{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# DeepInvention" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Reference\n", "* [Asking RNNs+LTSMs: What Would Mozart Write?](http://www.wise.io/tech/asking-rnn-and-ltsm-what-would-mozart-write)\n", "* [deep-learning/Anna_KaRNNa.ipynb at master ยท udacity/deep-learning](https://github.com/udacity/deep-learning/blob/master/intro-to-rnns/Anna_KaRNNa.ipynb)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## music21 UserSetting\n", "* http://web.mit.edu/music21/doc/tutorials/environment.html#environment\n", "* [music21](https://gist.github.com/Vesnica/f8862277e4e3a27593f4ca300eedf07e)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Install \n", "\n", " sudo apt install musescore timidity lilypond\n", " pip install music21 matplotlib scipy tensorflow\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from music21 import *" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "'/home/tsu-nera/.music21rc'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "us = environment.UserSettings()\n", "us.getSettingsPath()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "#us[\"musicxmlPath\"] = \"/usr/bin/gedit\"\n", "us[\"musicxmlPath\"] = \"/usr/bin/musescore\"\n", "us[\"midiPath\"] = \"/usr/bin/timidity\"\n", "us[\"showFormat\"] = \"lilypond\"\n", "us[\"writeFormat\"] = \"lilypond\"\n", "us[\"musescoreDirectPNGPath\"] = \"/usr/bin/musescore\"" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "## Prepare Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "!mkdir composer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "import glob\n", "REP=\"@\\n\"\n", "def trim_metadata(output_path, glob_path):\n", " comp_txt = open(output_path,\"w\")\n", " ll = glob.glob(glob_path)\n", " for song in ll:\n", " lines = open(song,\"r\").readlines()\n", " out = []\n", " found_first = False\n", " for l in lines:\n", " if l.startswith(\"=\"):\n", " ## new measure, replace the measure with the @ sign, not part of humdrum\n", " out.append(REP)\n", " found_first = True\n", " continue\n", " if not found_first:\n", " ## keep going until we find the end of the header and metadata\n", " continue\n", " if l.startswith(\"!\"):\n", " ## ignore comments\n", " continue\n", " out.append(l)\n", " comp_txt.writelines(out)\n", " comp_txt.close()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Get Data from KernScore\n", "* [KernScores](http://kern.humdrum.org/)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "%mkdir kernscore\n", "%mkdir kernscore/bach" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from urllib.request import urlopen\n", "for i in range(1,15+1):\n", " filename = \"inven{0:02d}.krn\".format(i)\n", " file = urlopen(\"http://kern.humdrum.org/cgi-bin/ksdata?l=osu/classical/bach/inventions&file=%s&f=kern\"%filename)\n", " with open(\"kernscore/bach/\"+filename,'wb') as output:\n", " output.write(file.read())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "output_path = \"composer/bach.txt\"\n", "glob_path = \"kernscore/bach/*.krn\"\n", "trim_metadata(output_path, glob_path)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import time\n", "from collections import namedtuple\n", "\n", "import numpy as np\n", "import tensorflow as tf" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "filename = 'composer/bach.txt'\n", "with open(filename, 'r') as f:\n", " text=f.read()\n", "vocab = set(text)\n", "vocab_to_int = {c: i for i, c in enumerate(vocab)}\n", "int_to_vocab = dict(enumerate(vocab))\n", "encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "data": { "text/plain": [ "'@\\n4.r\\t16dL\\n.\\t16e\\n.\\t16f\\n.\\t16g\\n.\\t16a\\n.\\t16b-J\\n@\\n4.r\\t1'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text[:50]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "data": { "text/plain": [ "array([11, 4, 8, 31, 15, 35, 7, 3, 27, 6, 4, 31, 35, 7, 3, 12, 4,\n", " 31, 35, 7, 3, 39, 4, 31, 35, 7, 3, 24, 4, 31, 35, 7, 3, 2,\n", " 4, 31, 35, 7, 3, 9, 13, 1, 4, 11, 4, 8, 31, 15, 35, 7, 3,\n", " 22, 32, 6, 4, 31, 35, 7, 3, 9, 13, 4, 31, 35, 7, 3, 2, 4,\n", " 31, 35, 7, 3, 24, 4, 31, 35, 7, 3, 39, 4, 31, 35, 7, 3, 12,\n", " 1, 4, 11, 4, 7, 3, 41, 6, 35, 21, 39, 6, 4, 7, 3], dtype=int32)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "encoded[:100]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "data": { "text/plain": [ "43" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vocab_size = len(vocab)\n", "vocab_size" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Preprocess" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def get_batches(arr, n_seqs, n_steps):\n", " '''Create a generator that returns batches of size\n", " n_seqs x n_steps from arr.\n", " \n", " Arguments\n", " ---------\n", " arr: Array you want to make batches from\n", " n_seqs: Batch size, the number of sequences per batch\n", " n_steps: Number of sequence steps per batch\n", " '''\n", " # Get the batch size and number of batches we can make\n", " batch_size = n_seqs * n_steps\n", " n_batches = len(arr)//batch_size\n", " \n", " # Keep only enough characters to make full batches\n", " arr = arr[:n_batches * batch_size]\n", " \n", " # Reshape into n_seqs rows\n", " arr = arr.reshape((n_seqs, -1))\n", " \n", " for n in range(0, arr.shape[1], n_steps):\n", " # The features\n", " x = arr[:, n:n+n_steps]\n", " # The targets, shifted by one\n", " y = np.zeros_like(x)\n", " y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]\n", " yield x, y" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "batches = get_batches(encoded, 10, 50)\n", "x, y = next(batches)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x\n", " [[11 4 8 31 15 35 7 3 27 6]\n", " [35 7 3 24 24 42 5 4 7 3]\n", " [ 7 3 22 22 32 4 31 35 7 3]\n", " [ 9 9 4 7 3 38 35 31 4 11]\n", " [ 3 24 4 7 3 33 35 7 3 39]\n", " [ 2 2 4 7 3 27 35 31 4 7]\n", " [16 18 41 32 35 7 3 39 32 1]\n", " [35 31 4 7 3 38 35 8 9 13]\n", " [27 1 4 11 4 7 3 33 6 35]\n", " [ 7 3 24 4 11 4 7 3 0 35]]\n", "\n", "y\n", " [[ 4 8 31 15 35 7 3 27 6 4]\n", " [ 7 3 24 24 42 5 4 7 3 36]\n", " [ 3 22 22 32 4 31 35 7 3 2]\n", " [ 9 4 7 3 38 35 31 4 11 4]\n", " [24 4 7 3 33 35 7 3 39 32]\n", " [ 2 4 7 3 27 35 31 4 7 3]\n", " [18 41 32 35 7 3 39 32 1 4]\n", " [31 4 7 3 38 35 8 9 13 19]\n", " [ 1 4 11 4 7 3 33 6 35 21]\n", " [ 3 24 4 11 4 7 3 0 35 21]]\n" ] } ], "source": [ "print('x\\n', x[:10, :10])\n", "print('\\ny\\n', y[:10, :10])" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Build Model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def build_inputs(batch_size, num_steps):\n", " ''' Define placeholders for inputs, targets, and dropout \n", " \n", " Arguments\n", " ---------\n", " batch_size: Batch size, number of sequences per batch\n", " num_steps: Number of sequence steps in a batch\n", " \n", " '''\n", " # Declare placeholders we'll feed into the graph\n", " inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')\n", " targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')\n", " \n", " # Keep probability placeholder for drop out layers\n", " keep_prob = tf.placeholder(tf.float32, name='keep_prob')\n", " \n", " return inputs, targets, keep_prob" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def build_lstm(lstm_size, num_layers, batch_size, keep_prob):\n", " ''' Build LSTM cell.\n", " \n", " Arguments\n", " ---------\n", " keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability\n", " lstm_size: Size of the hidden layers in the LSTM cells\n", " num_layers: Number of LSTM layers\n", " batch_size: Batch size\n", "\n", " '''\n", " ### Build the LSTM Cell\n", " # Use a basic LSTM cell\n", " lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)\n", " \n", " # Add dropout to the cell\n", " drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)\n", " \n", " # Stack up multiple LSTM layers, for deep learning\n", " cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)\n", " initial_state = cell.zero_state(batch_size, tf.float32)\n", " \n", " return cell, initial_state\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def build_output(lstm_output, in_size, out_size):\n", " ''' Build a softmax layer, return the softmax output and logits.\n", " \n", " Arguments\n", " ---------\n", " \n", " x: Input tensor\n", " in_size: Size of the input tensor, for example, size of the LSTM cells\n", " out_size: Size of this softmax layer\n", " \n", " '''\n", "\n", " # Reshape output so it's a bunch of rows, one row for each step for each sequence.\n", " # That is, the shape should be batch_size*num_steps rows by lstm_size columns\n", " seq_output = tf.concat(lstm_output, axis=1)\n", " x = tf.reshape(seq_output, [-1, in_size])\n", " \n", " # Connect the RNN outputs to a softmax layer\n", " with tf.variable_scope('softmax'):\n", " softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))\n", " softmax_b = tf.Variable(tf.zeros(out_size))\n", " \n", " # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch\n", " # of rows of logit outputs, one for each step and sequence\n", " logits = tf.matmul(x, softmax_w) + softmax_b\n", " \n", " # Use softmax to get the probabilities for predicted characters\n", " out = tf.nn.softmax(logits, name='predictions')\n", " \n", " return out, logits" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def build_loss(logits, targets, lstm_size, num_classes):\n", " ''' Calculate the loss from the logits and the targets.\n", " \n", " Arguments\n", " ---------\n", " logits: Logits from final fully connected layer\n", " targets: Targets for supervised learning\n", " lstm_size: Number of LSTM hidden units\n", " num_classes: Number of classes in targets\n", " \n", " '''\n", " \n", " # One-hot encode targets and reshape to match logits, one row per batch_size per step\n", " y_one_hot = tf.one_hot(targets, num_classes)\n", " y_reshaped = tf.reshape(y_one_hot, logits.get_shape())\n", " \n", " # Softmax cross entropy loss\n", " loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)\n", " loss = tf.reduce_mean(loss)\n", " return loss" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def build_optimizer(loss, learning_rate, grad_clip):\n", " ''' Build optmizer for training, using gradient clipping.\n", " \n", " Arguments:\n", " loss: Network loss\n", " learning_rate: Learning rate for optimizer\n", " \n", " '''\n", " \n", " # Optimizer for training, using gradient clipping to control exploding gradients\n", " tvars = tf.trainable_variables()\n", " grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)\n", " train_op = tf.train.AdamOptimizer(learning_rate)\n", " optimizer = train_op.apply_gradients(zip(grads, tvars))\n", " \n", " return optimizer" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "class CharRNN:\n", " \n", " def __init__(self, num_classes, batch_size=64, num_steps=50, \n", " lstm_size=128, num_layers=2, learning_rate=0.001, \n", " grad_clip=5, sampling=False):\n", " \n", " # When we're using this network for sampling later, we'll be passing in\n", " # one character at a time, so providing an option for that\n", " if sampling == True:\n", " batch_size, num_steps = 1, 1\n", " else:\n", " batch_size, num_steps = batch_size, num_steps\n", "\n", " tf.reset_default_graph()\n", " \n", " # Build the input placeholder tensors\n", " self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)\n", "\n", " # Build the LSTM cell\n", " cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)\n", "\n", " ### Run the data through the RNN layers\n", " # First, one-hot encode the input tokens\n", " x_one_hot = tf.one_hot(self.inputs, num_classes)\n", " \n", " # Run each sequence step through the RNN and collect the outputs\n", " outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)\n", " self.final_state = state\n", " \n", " # Get softmax predictions and logits\n", " self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)\n", " \n", " # Loss and optimizer (with gradient clipping)\n", " self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)\n", " self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Training" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "batch_size = 10\n", "num_steps = 10 \n", "lstm_size = 512\n", "num_layers = 2\n", "learning_rate = 0.001\n", "keep_prob = 0.5" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 1/20... Training Step: 1... Training loss: 3.7620... 0.1642 sec/batch\n", "Epoch: 1/20... Training Step: 2... Training loss: 3.7256... 0.1233 sec/batch\n", "Epoch: 1/20... Training Step: 3... Training loss: 3.6567... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 4... Training loss: 3.4860... 0.1188 sec/batch\n", "Epoch: 1/20... Training Step: 5... Training loss: 3.2803... 0.1250 sec/batch\n", "Epoch: 1/20... Training Step: 6... Training loss: 3.6022... 0.1220 sec/batch\n", "Epoch: 1/20... Training Step: 7... Training loss: 4.0865... 0.1183 sec/batch\n", "Epoch: 1/20... Training Step: 8... Training loss: 3.1127... 0.1227 sec/batch\n", "Epoch: 1/20... Training Step: 9... Training loss: 3.3907... 0.1215 sec/batch\n", "Epoch: 1/20... Training Step: 10... Training loss: 3.4856... 0.1189 sec/batch\n", "Epoch: 1/20... Training Step: 11... Training loss: 3.4883... 0.1214 sec/batch\n", "Epoch: 1/20... Training Step: 12... Training loss: 3.3462... 0.1254 sec/batch\n", "Epoch: 1/20... Training Step: 13... Training loss: 3.3203... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 14... Training loss: 3.1857... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 15... Training loss: 3.1654... 0.1260 sec/batch\n", "Epoch: 1/20... Training Step: 16... Training loss: 3.3089... 0.1259 sec/batch\n", "Epoch: 1/20... Training Step: 17... Training loss: 3.0935... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 18... Training loss: 3.0318... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 19... Training loss: 3.1196... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 20... Training loss: 2.9132... 0.1198 sec/batch\n", "Epoch: 1/20... Training Step: 21... Training loss: 3.1271... 0.1283 sec/batch\n", "Epoch: 1/20... Training Step: 22... Training loss: 3.1013... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 23... Training loss: 3.1496... 0.1247 sec/batch\n", "Epoch: 1/20... Training Step: 24... Training loss: 3.0565... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 25... Training loss: 2.9929... 0.1239 sec/batch\n", "Epoch: 1/20... Training Step: 26... Training loss: 3.0869... 0.1269 sec/batch\n", "Epoch: 1/20... Training Step: 27... Training loss: 3.2220... 0.1225 sec/batch\n", "Epoch: 1/20... Training Step: 28... Training loss: 3.1725... 0.1248 sec/batch\n", "Epoch: 1/20... Training Step: 29... Training loss: 3.0635... 0.1241 sec/batch\n", "Epoch: 1/20... Training Step: 30... Training loss: 3.1275... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 31... Training loss: 2.8966... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 32... Training loss: 2.9878... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 33... Training loss: 3.0631... 0.1196 sec/batch\n", "Epoch: 1/20... Training Step: 34... Training loss: 3.0332... 0.1258 sec/batch\n", "Epoch: 1/20... Training Step: 35... Training loss: 3.0228... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 36... Training loss: 2.9531... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 37... Training loss: 3.0487... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 38... Training loss: 2.9803... 0.1217 sec/batch\n", "Epoch: 1/20... Training Step: 39... Training loss: 2.9113... 0.1199 sec/batch\n", "Epoch: 1/20... Training Step: 40... Training loss: 3.1494... 0.1242 sec/batch\n", "Epoch: 1/20... Training Step: 41... Training loss: 2.8697... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 42... Training loss: 2.9242... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 43... Training loss: 3.0483... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 44... Training loss: 2.8306... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 45... Training loss: 2.8007... 0.1192 sec/batch\n", "Epoch: 1/20... Training Step: 46... Training loss: 2.9705... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 47... Training loss: 2.8790... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 48... Training loss: 2.9677... 0.1245 sec/batch\n", "Epoch: 1/20... Training Step: 49... Training loss: 2.8278... 0.1245 sec/batch\n", "Epoch: 1/20... Training Step: 50... Training loss: 3.0830... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 51... Training loss: 2.8571... 0.1262 sec/batch\n", "Epoch: 1/20... Training Step: 52... Training loss: 2.9284... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 53... Training loss: 3.1083... 0.1261 sec/batch\n", "Epoch: 1/20... Training Step: 54... Training loss: 3.0061... 0.1195 sec/batch\n", "Epoch: 1/20... Training Step: 55... Training loss: 2.9092... 0.1215 sec/batch\n", "Epoch: 1/20... Training Step: 56... Training loss: 2.9404... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 57... Training loss: 2.9676... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 58... Training loss: 2.9035... 0.1225 sec/batch\n", "Epoch: 1/20... Training Step: 59... Training loss: 2.8589... 0.1242 sec/batch\n", "Epoch: 1/20... Training Step: 60... Training loss: 2.8798... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 61... Training loss: 2.9282... 0.1190 sec/batch\n", "Epoch: 1/20... Training Step: 62... Training loss: 2.9433... 0.1257 sec/batch\n", "Epoch: 1/20... Training Step: 63... Training loss: 2.8242... 0.1214 sec/batch\n", "Epoch: 1/20... Training Step: 64... Training loss: 2.8128... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 65... Training loss: 2.8223... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 66... Training loss: 2.9849... 0.1269 sec/batch\n", "Epoch: 1/20... Training Step: 67... Training loss: 2.9266... 0.1241 sec/batch\n", "Epoch: 1/20... Training Step: 68... Training loss: 2.9161... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 69... Training loss: 2.8079... 0.1184 sec/batch\n", "Epoch: 1/20... Training Step: 70... Training loss: 2.8682... 0.1167 sec/batch\n", "Epoch: 1/20... Training Step: 71... Training loss: 2.8148... 0.1274 sec/batch\n", "Epoch: 1/20... Training Step: 72... Training loss: 2.9319... 0.1277 sec/batch\n", "Epoch: 1/20... Training Step: 73... Training loss: 2.8397... 0.1169 sec/batch\n", "Epoch: 1/20... Training Step: 74... Training loss: 2.8626... 0.1186 sec/batch\n", "Epoch: 1/20... Training Step: 75... Training loss: 3.0676... 0.1178 sec/batch\n", "Epoch: 1/20... Training Step: 76... Training loss: 2.9233... 0.1202 sec/batch\n", "Epoch: 1/20... Training Step: 77... Training loss: 2.7613... 0.1244 sec/batch\n", "Epoch: 1/20... Training Step: 78... Training loss: 2.9925... 0.1241 sec/batch\n", "Epoch: 1/20... Training Step: 79... Training loss: 2.7884... 0.1273 sec/batch\n", "Epoch: 1/20... Training Step: 80... Training loss: 2.7235... 0.1301 sec/batch\n", "Epoch: 1/20... Training Step: 81... Training loss: 2.9118... 0.1195 sec/batch\n", "Epoch: 1/20... Training Step: 82... Training loss: 2.9589... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 83... Training loss: 2.7112... 0.1238 sec/batch\n", "Epoch: 1/20... Training Step: 84... Training loss: 2.8291... 0.1217 sec/batch\n", "Epoch: 1/20... Training Step: 85... Training loss: 2.7706... 0.1315 sec/batch\n", "Epoch: 1/20... Training Step: 86... Training loss: 2.6703... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 87... Training loss: 2.7684... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 88... Training loss: 2.8455... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 89... Training loss: 2.9805... 0.1220 sec/batch\n", "Epoch: 1/20... Training Step: 90... Training loss: 2.6880... 0.1194 sec/batch\n", "Epoch: 1/20... Training Step: 91... Training loss: 2.8288... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 92... Training loss: 2.8261... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 93... Training loss: 2.6363... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 94... Training loss: 2.8438... 0.1238 sec/batch\n", "Epoch: 1/20... Training Step: 95... Training loss: 2.5802... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 96... Training loss: 2.7704... 0.1238 sec/batch\n", "Epoch: 1/20... Training Step: 97... Training loss: 2.9868... 0.1239 sec/batch\n", "Epoch: 1/20... Training Step: 98... Training loss: 2.8030... 0.1260 sec/batch\n", "Epoch: 1/20... Training Step: 99... Training loss: 2.6999... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 100... Training loss: 2.7008... 0.1196 sec/batch\n", "Epoch: 1/20... Training Step: 101... Training loss: 2.7749... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 102... Training loss: 2.8638... 0.1252 sec/batch\n", "Epoch: 1/20... Training Step: 103... Training loss: 2.7618... 0.1239 sec/batch\n", "Epoch: 1/20... Training Step: 104... Training loss: 2.6063... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 105... Training loss: 2.6149... 0.1242 sec/batch\n", "Epoch: 1/20... Training Step: 106... Training loss: 2.9139... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 107... Training loss: 2.9257... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 108... Training loss: 2.9118... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 109... Training loss: 2.7397... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 110... Training loss: 2.7949... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 111... Training loss: 2.7819... 0.1240 sec/batch\n", "Epoch: 1/20... Training Step: 112... Training loss: 2.7028... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 113... Training loss: 2.6550... 0.1264 sec/batch\n", "Epoch: 1/20... Training Step: 114... Training loss: 2.8067... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 115... Training loss: 2.6431... 0.1203 sec/batch\n", "Epoch: 1/20... Training Step: 116... Training loss: 2.5301... 0.1255 sec/batch\n", "Epoch: 1/20... Training Step: 117... Training loss: 2.6822... 0.1195 sec/batch\n", "Epoch: 1/20... Training Step: 118... Training loss: 2.6422... 0.1225 sec/batch\n", "Epoch: 1/20... Training Step: 119... Training loss: 2.5233... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 120... Training loss: 2.3074... 0.1192 sec/batch\n", "Epoch: 1/20... Training Step: 121... Training loss: 2.4839... 0.1240 sec/batch\n", "Epoch: 1/20... Training Step: 122... Training loss: 2.5947... 0.1242 sec/batch\n", "Epoch: 1/20... Training Step: 123... Training loss: 2.6883... 0.1250 sec/batch\n", "Epoch: 1/20... Training Step: 124... Training loss: 2.4409... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 125... Training loss: 2.5898... 0.1205 sec/batch\n", "Epoch: 1/20... Training Step: 126... Training loss: 2.3660... 0.1183 sec/batch\n", "Epoch: 1/20... Training Step: 127... Training loss: 2.3348... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 128... Training loss: 2.5245... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 129... Training loss: 2.5923... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 130... Training loss: 2.4242... 0.1249 sec/batch\n", "Epoch: 1/20... Training Step: 131... Training loss: 2.8288... 0.1198 sec/batch\n", "Epoch: 1/20... Training Step: 132... Training loss: 2.3149... 0.1204 sec/batch\n", "Epoch: 1/20... Training Step: 133... Training loss: 2.3525... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 134... Training loss: 2.3290... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 135... Training loss: 2.1879... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 136... Training loss: 2.1351... 0.1202 sec/batch\n", "Epoch: 1/20... Training Step: 137... Training loss: 2.4319... 0.1197 sec/batch\n", "Epoch: 1/20... Training Step: 138... Training loss: 2.4167... 0.1169 sec/batch\n", "Epoch: 1/20... Training Step: 139... Training loss: 2.2640... 0.1190 sec/batch\n", "Epoch: 1/20... Training Step: 140... Training loss: 2.3276... 0.1220 sec/batch\n", "Epoch: 1/20... Training Step: 141... Training loss: 2.4944... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 142... Training loss: 2.3255... 0.1240 sec/batch\n", "Epoch: 1/20... Training Step: 143... Training loss: 2.3086... 0.1197 sec/batch\n", "Epoch: 1/20... Training Step: 144... Training loss: 2.2411... 0.1262 sec/batch\n", "Epoch: 1/20... Training Step: 145... Training loss: 2.3505... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 146... Training loss: 2.2917... 0.1205 sec/batch\n", "Epoch: 1/20... Training Step: 147... Training loss: 2.4639... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 148... Training loss: 2.0835... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 149... Training loss: 2.2542... 0.1188 sec/batch\n", "Epoch: 1/20... Training Step: 150... Training loss: 2.3682... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 151... Training loss: 2.2879... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 152... Training loss: 2.2459... 0.1166 sec/batch\n", "Epoch: 1/20... Training Step: 153... Training loss: 2.4501... 0.1250 sec/batch\n", "Epoch: 1/20... Training Step: 154... Training loss: 2.3604... 0.1238 sec/batch\n", "Epoch: 1/20... Training Step: 155... Training loss: 2.2573... 0.1251 sec/batch\n", "Epoch: 1/20... Training Step: 156... Training loss: 2.1554... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 157... Training loss: 1.9763... 0.1264 sec/batch\n", "Epoch: 1/20... Training Step: 158... Training loss: 2.0656... 0.1255 sec/batch\n", "Epoch: 1/20... Training Step: 159... Training loss: 2.0456... 0.1246 sec/batch\n", "Epoch: 1/20... Training Step: 160... Training loss: 2.1463... 0.1206 sec/batch\n", "Epoch: 1/20... Training Step: 161... Training loss: 2.4674... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 162... Training loss: 2.1990... 0.1213 sec/batch\n", "Epoch: 1/20... Training Step: 163... Training loss: 2.1825... 0.1241 sec/batch\n", "Epoch: 1/20... Training Step: 164... Training loss: 2.0409... 0.1204 sec/batch\n", "Epoch: 1/20... Training Step: 165... Training loss: 2.1511... 0.1212 sec/batch\n", "Epoch: 1/20... Training Step: 166... Training loss: 2.0999... 0.1215 sec/batch\n", "Epoch: 1/20... Training Step: 167... Training loss: 2.0156... 0.1248 sec/batch\n", "Epoch: 1/20... Training Step: 168... Training loss: 2.2786... 0.1198 sec/batch\n", "Epoch: 1/20... Training Step: 169... Training loss: 2.2583... 0.1214 sec/batch\n", "Epoch: 1/20... Training Step: 170... Training loss: 2.1629... 0.1261 sec/batch\n", "Epoch: 1/20... Training Step: 171... Training loss: 2.1873... 0.1271 sec/batch\n", "Epoch: 1/20... Training Step: 172... Training loss: 2.1109... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 173... Training loss: 2.2487... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 174... Training loss: 2.1224... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 175... Training loss: 2.1910... 0.1261 sec/batch\n", "Epoch: 1/20... Training Step: 176... Training loss: 1.9140... 0.1263 sec/batch\n", "Epoch: 1/20... Training Step: 177... Training loss: 1.9269... 0.1381 sec/batch\n", "Epoch: 1/20... Training Step: 178... Training loss: 2.2543... 0.1342 sec/batch\n", "Epoch: 1/20... Training Step: 179... Training loss: 2.0510... 0.1197 sec/batch\n", "Epoch: 1/20... Training Step: 180... Training loss: 2.2251... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 181... Training loss: 2.0182... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 182... Training loss: 2.1679... 0.1253 sec/batch\n", "Epoch: 1/20... Training Step: 183... Training loss: 2.1451... 0.1212 sec/batch\n", "Epoch: 1/20... Training Step: 184... Training loss: 2.1111... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 185... Training loss: 2.2130... 0.1185 sec/batch\n", "Epoch: 1/20... Training Step: 186... Training loss: 1.8731... 0.1240 sec/batch\n", "Epoch: 1/20... Training Step: 187... Training loss: 2.0875... 0.1259 sec/batch\n", "Epoch: 1/20... Training Step: 188... Training loss: 1.7481... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 189... Training loss: 2.1269... 0.1285 sec/batch\n", "Epoch: 1/20... Training Step: 190... Training loss: 1.8619... 0.1321 sec/batch\n", "Epoch: 1/20... Training Step: 191... Training loss: 1.9233... 0.1252 sec/batch\n", "Epoch: 1/20... Training Step: 192... Training loss: 2.0531... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 193... Training loss: 1.9795... 0.1364 sec/batch\n", "Epoch: 1/20... Training Step: 194... Training loss: 1.8791... 0.1308 sec/batch\n", "Epoch: 1/20... Training Step: 195... Training loss: 2.2336... 0.1297 sec/batch\n", "Epoch: 1/20... Training Step: 196... Training loss: 2.0534... 0.1352 sec/batch\n", "Epoch: 1/20... Training Step: 197... Training loss: 1.8241... 0.1365 sec/batch\n", "Epoch: 1/20... Training Step: 198... Training loss: 2.0393... 0.1306 sec/batch\n", "Epoch: 1/20... Training Step: 199... Training loss: 1.7450... 0.1350 sec/batch\n", "Epoch: 1/20... Training Step: 200... Training loss: 1.7357... 0.1320 sec/batch\n", "Epoch: 1/20... Training Step: 201... Training loss: 1.9492... 0.1283 sec/batch\n", "Epoch: 1/20... Training Step: 202... Training loss: 1.9008... 0.1301 sec/batch\n", "Epoch: 1/20... Training Step: 203... Training loss: 1.8058... 0.1433 sec/batch\n", "Epoch: 1/20... Training Step: 204... Training loss: 2.0252... 0.1273 sec/batch\n", "Epoch: 1/20... Training Step: 205... Training loss: 1.7999... 0.1380 sec/batch\n", "Epoch: 1/20... Training Step: 206... Training loss: 1.7063... 0.1327 sec/batch\n", "Epoch: 1/20... Training Step: 207... Training loss: 1.8999... 0.1324 sec/batch\n", "Epoch: 1/20... Training Step: 208... Training loss: 1.8881... 0.1335 sec/batch\n", "Epoch: 1/20... Training Step: 209... Training loss: 1.8977... 0.1328 sec/batch\n", "Epoch: 1/20... Training Step: 210... Training loss: 2.0043... 0.1271 sec/batch\n", "Epoch: 1/20... Training Step: 211... Training loss: 1.5164... 0.1328 sec/batch\n", "Epoch: 1/20... Training Step: 212... Training loss: 2.0064... 0.1357 sec/batch\n", "Epoch: 1/20... Training Step: 213... Training loss: 1.9792... 0.1264 sec/batch\n", "Epoch: 1/20... Training Step: 214... Training loss: 1.7379... 0.1256 sec/batch\n", "Epoch: 1/20... Training Step: 215... Training loss: 2.1222... 0.1225 sec/batch\n", "Epoch: 1/20... Training Step: 216... Training loss: 1.7652... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 217... Training loss: 2.0598... 0.1341 sec/batch\n", "Epoch: 1/20... Training Step: 218... Training loss: 1.9019... 0.1372 sec/batch\n", "Epoch: 1/20... Training Step: 219... Training loss: 2.0017... 0.1505 sec/batch\n", "Epoch: 1/20... Training Step: 220... Training loss: 1.9209... 0.1393 sec/batch\n", "Epoch: 1/20... Training Step: 221... Training loss: 1.8711... 0.1373 sec/batch\n", "Epoch: 1/20... Training Step: 222... Training loss: 2.1657... 0.1415 sec/batch\n", "Epoch: 1/20... Training Step: 223... Training loss: 2.0872... 0.1364 sec/batch\n", "Epoch: 1/20... Training Step: 224... Training loss: 2.0484... 0.1354 sec/batch\n", "Epoch: 1/20... Training Step: 225... Training loss: 1.9232... 0.1396 sec/batch\n", "Epoch: 1/20... Training Step: 226... Training loss: 2.0963... 0.1369 sec/batch\n", "Epoch: 1/20... Training Step: 227... Training loss: 2.0884... 0.1375 sec/batch\n", "Epoch: 1/20... Training Step: 228... Training loss: 1.9220... 0.1246 sec/batch\n", "Epoch: 1/20... Training Step: 229... Training loss: 1.9084... 0.1239 sec/batch\n", "Epoch: 1/20... Training Step: 230... Training loss: 2.1362... 0.1292 sec/batch\n", "Epoch: 1/20... Training Step: 231... Training loss: 1.8550... 0.1276 sec/batch\n", "Epoch: 1/20... Training Step: 232... Training loss: 1.8892... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 233... Training loss: 2.2566... 0.1180 sec/batch\n", "Epoch: 1/20... Training Step: 234... Training loss: 1.9981... 0.1170 sec/batch\n", "Epoch: 1/20... Training Step: 235... Training loss: 2.0485... 0.1258 sec/batch\n", "Epoch: 1/20... Training Step: 236... Training loss: 1.8156... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 237... Training loss: 2.1858... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 238... Training loss: 1.8726... 0.1269 sec/batch\n", "Epoch: 1/20... Training Step: 239... Training loss: 1.9511... 0.1295 sec/batch\n", "Epoch: 1/20... Training Step: 240... Training loss: 2.1758... 0.1268 sec/batch\n", "Epoch: 1/20... Training Step: 241... Training loss: 1.9088... 0.1327 sec/batch\n", "Epoch: 1/20... Training Step: 242... Training loss: 2.0303... 0.1412 sec/batch\n", "Epoch: 1/20... Training Step: 243... Training loss: 2.1431... 0.1331 sec/batch\n", "Epoch: 1/20... Training Step: 244... Training loss: 1.8868... 0.1342 sec/batch\n", "Epoch: 1/20... Training Step: 245... Training loss: 1.8554... 0.1274 sec/batch\n", "Epoch: 1/20... Training Step: 246... Training loss: 1.6133... 0.1356 sec/batch\n", "Epoch: 1/20... Training Step: 247... Training loss: 1.8370... 0.1277 sec/batch\n", "Epoch: 1/20... Training Step: 248... Training loss: 1.9527... 0.1214 sec/batch\n", "Epoch: 1/20... Training Step: 249... Training loss: 1.8062... 0.1156 sec/batch\n", "Epoch: 1/20... Training Step: 250... Training loss: 1.7428... 0.1226 sec/batch\n", "Epoch: 1/20... Training Step: 251... Training loss: 1.9264... 0.1285 sec/batch\n", "Epoch: 1/20... Training Step: 252... Training loss: 1.8250... 0.1279 sec/batch\n", "Epoch: 1/20... Training Step: 253... Training loss: 1.7278... 0.1298 sec/batch\n", "Epoch: 1/20... Training Step: 254... Training loss: 2.1920... 0.1351 sec/batch\n", "Epoch: 1/20... Training Step: 255... Training loss: 1.8406... 0.1345 sec/batch\n", "Epoch: 1/20... Training Step: 256... Training loss: 1.7293... 0.1367 sec/batch\n", "Epoch: 1/20... Training Step: 257... Training loss: 1.9573... 0.1311 sec/batch\n", "Epoch: 1/20... Training Step: 258... Training loss: 1.9288... 0.1309 sec/batch\n", "Epoch: 1/20... Training Step: 259... Training loss: 1.9979... 0.1295 sec/batch\n", "Epoch: 1/20... Training Step: 260... Training loss: 1.9598... 0.1355 sec/batch\n", "Epoch: 1/20... Training Step: 261... Training loss: 1.9618... 0.1301 sec/batch\n", "Epoch: 1/20... Training Step: 262... Training loss: 1.9427... 0.1299 sec/batch\n", "Epoch: 1/20... Training Step: 263... Training loss: 1.9407... 0.1284 sec/batch\n", "Epoch: 1/20... Training Step: 264... Training loss: 1.9523... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 265... Training loss: 1.9571... 0.1206 sec/batch\n", "Epoch: 1/20... Training Step: 266... Training loss: 1.8152... 0.1291 sec/batch\n", "Epoch: 1/20... Training Step: 267... Training loss: 2.0836... 0.1350 sec/batch\n", "Epoch: 1/20... Training Step: 268... Training loss: 2.0145... 0.1366 sec/batch\n", "Epoch: 1/20... Training Step: 269... Training loss: 1.9578... 0.1317 sec/batch\n", "Epoch: 1/20... Training Step: 270... Training loss: 2.1346... 0.1203 sec/batch\n", "Epoch: 1/20... Training Step: 271... Training loss: 1.7738... 0.1267 sec/batch\n", "Epoch: 1/20... Training Step: 272... Training loss: 2.0176... 0.1273 sec/batch\n", "Epoch: 1/20... Training Step: 273... Training loss: 1.9272... 0.1310 sec/batch\n", "Epoch: 1/20... Training Step: 274... Training loss: 1.8668... 0.1284 sec/batch\n", "Epoch: 1/20... Training Step: 275... Training loss: 1.9420... 0.1279 sec/batch\n", "Epoch: 1/20... Training Step: 276... Training loss: 1.9878... 0.1259 sec/batch\n", "Epoch: 1/20... Training Step: 277... Training loss: 2.0064... 0.1336 sec/batch\n", "Epoch: 1/20... Training Step: 278... Training loss: 2.0244... 0.1296 sec/batch\n", "Epoch: 1/20... Training Step: 279... Training loss: 1.7891... 0.1286 sec/batch\n", "Epoch: 1/20... Training Step: 280... Training loss: 1.7947... 0.1274 sec/batch\n", "Epoch: 1/20... Training Step: 281... Training loss: 1.6625... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 282... Training loss: 1.7442... 0.1244 sec/batch\n", "Epoch: 1/20... Training Step: 283... Training loss: 1.6750... 0.1179 sec/batch\n", "Epoch: 1/20... Training Step: 284... Training loss: 1.8013... 0.1205 sec/batch\n", "Epoch: 1/20... Training Step: 285... Training loss: 1.6533... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 286... Training loss: 1.8193... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 287... Training loss: 1.6664... 0.1203 sec/batch\n", "Epoch: 1/20... Training Step: 288... Training loss: 1.8409... 0.1194 sec/batch\n", "Epoch: 1/20... Training Step: 289... Training loss: 2.0103... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 290... Training loss: 1.8769... 0.1256 sec/batch\n", "Epoch: 1/20... Training Step: 291... Training loss: 1.9272... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 292... Training loss: 1.7780... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 293... Training loss: 1.7624... 0.1267 sec/batch\n", "Epoch: 1/20... Training Step: 294... Training loss: 1.9197... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 295... Training loss: 1.8272... 0.1339 sec/batch\n", "Epoch: 1/20... Training Step: 296... Training loss: 2.0636... 0.1272 sec/batch\n", "Epoch: 1/20... Training Step: 297... Training loss: 1.8698... 0.1327 sec/batch\n", "Epoch: 1/20... Training Step: 298... Training loss: 1.8798... 0.1313 sec/batch\n", "Epoch: 1/20... Training Step: 299... Training loss: 1.8173... 0.1287 sec/batch\n", "Epoch: 1/20... Training Step: 300... Training loss: 1.8802... 0.1297 sec/batch\n", "Epoch: 1/20... Training Step: 301... Training loss: 1.8425... 0.1284 sec/batch\n", "Epoch: 1/20... Training Step: 302... Training loss: 1.7539... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 303... Training loss: 1.6652... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 304... Training loss: 1.9984... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 305... Training loss: 1.6832... 0.1223 sec/batch\n", "Epoch: 1/20... Training Step: 306... Training loss: 2.0259... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 307... Training loss: 1.7023... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 308... Training loss: 2.2779... 0.1259 sec/batch\n", "Epoch: 1/20... Training Step: 309... Training loss: 1.7804... 0.1248 sec/batch\n", "Epoch: 1/20... Training Step: 310... Training loss: 2.0357... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 311... Training loss: 1.9977... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 312... Training loss: 1.8353... 0.1237 sec/batch\n", "Epoch: 1/20... Training Step: 313... Training loss: 1.9002... 0.1264 sec/batch\n", "Epoch: 1/20... Training Step: 314... Training loss: 1.8166... 0.1167 sec/batch\n", "Epoch: 1/20... Training Step: 315... Training loss: 1.5815... 0.1188 sec/batch\n", "Epoch: 1/20... Training Step: 316... Training loss: 1.8202... 0.1207 sec/batch\n", "Epoch: 1/20... Training Step: 317... Training loss: 1.9972... 0.1240 sec/batch\n", "Epoch: 1/20... Training Step: 318... Training loss: 1.6601... 0.1254 sec/batch\n", "Epoch: 1/20... Training Step: 319... Training loss: 1.8317... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 320... Training loss: 1.7362... 0.1200 sec/batch\n", "Epoch: 1/20... Training Step: 321... Training loss: 1.7212... 0.1241 sec/batch\n", "Epoch: 1/20... Training Step: 322... Training loss: 1.9143... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 323... Training loss: 1.5693... 0.1203 sec/batch\n", "Epoch: 1/20... Training Step: 324... Training loss: 1.5907... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 325... Training loss: 1.5188... 0.1176 sec/batch\n", "Epoch: 1/20... Training Step: 326... Training loss: 1.7385... 0.1246 sec/batch\n", "Epoch: 1/20... Training Step: 327... Training loss: 1.7668... 0.1233 sec/batch\n", "Epoch: 1/20... Training Step: 328... Training loss: 1.7833... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 329... Training loss: 1.8986... 0.1213 sec/batch\n", "Epoch: 1/20... Training Step: 330... Training loss: 1.7910... 0.1223 sec/batch\n", "Epoch: 1/20... Training Step: 331... Training loss: 1.7676... 0.1231 sec/batch\n", "Epoch: 1/20... Training Step: 332... Training loss: 1.9112... 0.1220 sec/batch\n", "Epoch: 1/20... Training Step: 333... Training loss: 1.8434... 0.1257 sec/batch\n", "Epoch: 1/20... Training Step: 334... Training loss: 1.7872... 0.1177 sec/batch\n", "Epoch: 1/20... Training Step: 335... Training loss: 1.9730... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 336... Training loss: 1.9150... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 337... Training loss: 1.8544... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 338... Training loss: 1.9315... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 339... Training loss: 1.8512... 0.1244 sec/batch\n", "Epoch: 1/20... Training Step: 340... Training loss: 1.7988... 0.1212 sec/batch\n", "Epoch: 1/20... Training Step: 341... Training loss: 1.6037... 0.1270 sec/batch\n", "Epoch: 1/20... Training Step: 342... Training loss: 1.8011... 0.1232 sec/batch\n", "Epoch: 1/20... Training Step: 343... Training loss: 1.5731... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 344... Training loss: 1.9590... 0.1202 sec/batch\n", "Epoch: 1/20... Training Step: 345... Training loss: 1.8489... 0.1183 sec/batch\n", "Epoch: 1/20... Training Step: 346... Training loss: 1.6802... 0.1263 sec/batch\n", "Epoch: 1/20... Training Step: 347... Training loss: 1.5646... 0.1212 sec/batch\n", "Epoch: 1/20... Training Step: 348... Training loss: 1.9530... 0.1227 sec/batch\n", "Epoch: 1/20... Training Step: 349... Training loss: 1.7007... 0.1255 sec/batch\n", "Epoch: 1/20... Training Step: 350... Training loss: 1.8760... 0.1199 sec/batch\n", "Epoch: 1/20... Training Step: 351... Training loss: 1.7809... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 352... Training loss: 1.8105... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 353... Training loss: 1.6809... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 354... Training loss: 1.4776... 0.1270 sec/batch\n", "Epoch: 1/20... Training Step: 355... Training loss: 2.0394... 0.1206 sec/batch\n", "Epoch: 1/20... Training Step: 356... Training loss: 1.7149... 0.1252 sec/batch\n", "Epoch: 1/20... Training Step: 357... Training loss: 1.5545... 0.1213 sec/batch\n", "Epoch: 1/20... Training Step: 358... Training loss: 1.8659... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 359... Training loss: 1.7484... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 360... Training loss: 1.6083... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 361... Training loss: 1.8397... 0.1193 sec/batch\n", "Epoch: 1/20... Training Step: 362... Training loss: 1.7802... 0.1207 sec/batch\n", "Epoch: 1/20... Training Step: 363... Training loss: 1.8073... 0.1211 sec/batch\n", "Epoch: 1/20... Training Step: 364... Training loss: 1.7364... 0.1175 sec/batch\n", "Epoch: 1/20... Training Step: 365... Training loss: 1.7202... 0.1268 sec/batch\n", "Epoch: 1/20... Training Step: 366... Training loss: 1.8243... 0.1281 sec/batch\n", "Epoch: 1/20... Training Step: 367... Training loss: 1.6731... 0.1288 sec/batch\n", "Epoch: 1/20... Training Step: 368... Training loss: 1.7976... 0.1260 sec/batch\n", "Epoch: 1/20... Training Step: 369... Training loss: 1.7247... 0.1251 sec/batch\n", "Epoch: 1/20... Training Step: 370... Training loss: 1.7413... 0.1244 sec/batch\n", "Epoch: 1/20... Training Step: 371... Training loss: 1.8750... 0.1243 sec/batch\n", "Epoch: 1/20... Training Step: 372... Training loss: 1.8337... 0.1234 sec/batch\n", "Epoch: 1/20... Training Step: 373... Training loss: 1.8835... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 374... Training loss: 2.0701... 0.1277 sec/batch\n", "Epoch: 1/20... Training Step: 375... Training loss: 1.7518... 0.1290 sec/batch\n", "Epoch: 1/20... Training Step: 376... Training loss: 1.8389... 0.1459 sec/batch\n", "Epoch: 1/20... Training Step: 377... Training loss: 1.7932... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 378... Training loss: 1.8923... 0.1196 sec/batch\n", "Epoch: 1/20... Training Step: 379... Training loss: 1.6982... 0.1236 sec/batch\n", "Epoch: 1/20... Training Step: 380... Training loss: 1.8352... 0.1249 sec/batch\n", "Epoch: 1/20... Training Step: 381... Training loss: 1.7485... 0.1255 sec/batch\n", "Epoch: 1/20... Training Step: 382... Training loss: 1.7305... 0.1200 sec/batch\n", "Epoch: 1/20... Training Step: 383... Training loss: 1.7531... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 384... Training loss: 1.7135... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 385... Training loss: 1.5366... 0.1196 sec/batch\n", "Epoch: 1/20... Training Step: 386... Training loss: 1.8873... 0.1195 sec/batch\n", "Epoch: 1/20... Training Step: 387... Training loss: 1.8128... 0.1238 sec/batch\n", "Epoch: 1/20... Training Step: 388... Training loss: 1.7778... 0.1235 sec/batch\n", "Epoch: 1/20... Training Step: 389... Training loss: 1.7313... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 390... Training loss: 1.7203... 0.1227 sec/batch\n", "Epoch: 1/20... Training Step: 391... Training loss: 1.6142... 0.1233 sec/batch\n", "Epoch: 1/20... Training Step: 392... Training loss: 1.6548... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 393... Training loss: 1.6818... 0.1181 sec/batch\n", "Epoch: 1/20... Training Step: 394... Training loss: 1.8411... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 395... Training loss: 1.5136... 0.1249 sec/batch\n", "Epoch: 1/20... Training Step: 396... Training loss: 1.8199... 0.1205 sec/batch\n", "Epoch: 1/20... Training Step: 397... Training loss: 1.5027... 0.1245 sec/batch\n", "Epoch: 1/20... Training Step: 398... Training loss: 1.4402... 0.1178 sec/batch\n", "Epoch: 1/20... Training Step: 399... Training loss: 1.6574... 0.1253 sec/batch\n", "Epoch: 1/20... Training Step: 400... Training loss: 1.5007... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 401... Training loss: 1.8357... 0.1199 sec/batch\n", "Epoch: 1/20... Training Step: 402... Training loss: 1.7633... 0.1200 sec/batch\n", "Epoch: 1/20... Training Step: 403... Training loss: 1.6854... 0.1191 sec/batch\n", "Epoch: 1/20... Training Step: 404... Training loss: 1.8063... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 405... Training loss: 1.6896... 0.1205 sec/batch\n", "Epoch: 1/20... Training Step: 406... Training loss: 1.7406... 0.1259 sec/batch\n", "Epoch: 1/20... Training Step: 407... Training loss: 1.8450... 0.1270 sec/batch\n", "Epoch: 1/20... Training Step: 408... Training loss: 1.9894... 0.1220 sec/batch\n", "Epoch: 1/20... Training Step: 409... Training loss: 1.7480... 0.1221 sec/batch\n", "Epoch: 1/20... Training Step: 410... Training loss: 1.7030... 0.1210 sec/batch\n", "Epoch: 1/20... Training Step: 411... Training loss: 1.7146... 0.1219 sec/batch\n", "Epoch: 1/20... Training Step: 412... Training loss: 1.7491... 0.1209 sec/batch\n", "Epoch: 1/20... Training Step: 413... Training loss: 1.6690... 0.1186 sec/batch\n", "Epoch: 1/20... Training Step: 414... Training loss: 1.6483... 0.1254 sec/batch\n", "Epoch: 1/20... Training Step: 415... Training loss: 2.0953... 0.1253 sec/batch\n", "Epoch: 1/20... Training Step: 416... Training loss: 2.0327... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 417... Training loss: 1.9521... 0.1294 sec/batch\n", "Epoch: 1/20... Training Step: 418... Training loss: 1.6823... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 419... Training loss: 1.8449... 0.1245 sec/batch\n", "Epoch: 1/20... Training Step: 420... Training loss: 1.6649... 0.1266 sec/batch\n", "Epoch: 1/20... Training Step: 421... Training loss: 1.8106... 0.1248 sec/batch\n", "Epoch: 1/20... Training Step: 422... Training loss: 1.6933... 0.1229 sec/batch\n", "Epoch: 1/20... Training Step: 423... Training loss: 1.9252... 0.1264 sec/batch\n", "Epoch: 1/20... Training Step: 424... Training loss: 1.9893... 0.1250 sec/batch\n", "Epoch: 1/20... Training Step: 425... Training loss: 1.7732... 0.1247 sec/batch\n", "Epoch: 1/20... Training Step: 426... Training loss: 1.7674... 0.1239 sec/batch\n", "Epoch: 1/20... Training Step: 427... Training loss: 1.6535... 0.1268 sec/batch\n", "Epoch: 1/20... Training Step: 428... Training loss: 2.0166... 0.1194 sec/batch\n", "Epoch: 1/20... Training Step: 429... Training loss: 1.6403... 0.1226 sec/batch\n", "Epoch: 1/20... Training Step: 430... Training loss: 1.7100... 0.1224 sec/batch\n", "Epoch: 1/20... Training Step: 431... Training loss: 1.9968... 0.1201 sec/batch\n", "Epoch: 1/20... Training Step: 432... Training loss: 1.7512... 0.1216 sec/batch\n", "Epoch: 1/20... Training Step: 433... Training loss: 1.9491... 0.1228 sec/batch\n", "Epoch: 1/20... Training Step: 434... Training loss: 1.9198... 0.1382 sec/batch\n", "Epoch: 1/20... Training Step: 435... Training loss: 1.6259... 0.1230 sec/batch\n", "Epoch: 1/20... Training Step: 436... Training loss: 1.8238... 0.1213 sec/batch\n", "Epoch: 1/20... Training Step: 437... Training loss: 1.7599... 0.1218 sec/batch\n", "Epoch: 1/20... Training Step: 438... Training loss: 1.8576... 0.1255 sec/batch\n", "Epoch: 1/20... Training Step: 439... Training loss: 1.9367... 0.1207 sec/batch\n", "Epoch: 1/20... Training Step: 440... Training loss: 2.1496... 0.1303 sec/batch\n", "Epoch: 1/20... Training Step: 441... Training loss: 1.9533... 0.1361 sec/batch\n", "Epoch: 1/20... Training Step: 442... Training loss: 1.7783... 0.1323 sec/batch\n", "Epoch: 1/20... Training Step: 443... Training loss: 1.8428... 0.1197 sec/batch\n", "Epoch: 1/20... Training Step: 444... Training loss: 1.7003... 0.1290 sec/batch\n", "Epoch: 1/20... Training Step: 445... Training loss: 1.7882... 0.1272 sec/batch\n", "Epoch: 1/20... Training Step: 446... Training loss: 1.7898... 0.1245 sec/batch\n", "Epoch: 1/20... Training Step: 447... Training loss: 1.7436... 0.1284 sec/batch\n", "Epoch: 1/20... Training Step: 448... Training loss: 1.6811... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 449... Training loss: 1.7512... 0.1249 sec/batch\n", "Epoch: 1/20... Training Step: 450... Training loss: 1.7195... 0.1251 sec/batch\n", "Epoch: 1/20... Training Step: 451... Training loss: 1.8251... 0.1198 sec/batch\n", "Epoch: 1/20... Training Step: 452... Training loss: 1.7388... 0.1209 sec/batch\n", "Epoch: 1/20... Training Step: 453... Training loss: 1.6635... 0.1208 sec/batch\n", "Epoch: 1/20... Training Step: 454... Training loss: 2.0937... 0.1227 sec/batch\n", "Epoch: 1/20... Training Step: 455... Training loss: 1.8394... 0.1225 sec/batch\n", "Epoch: 1/20... Training Step: 456... Training loss: 1.6080... 0.1203 sec/batch\n", "Epoch: 1/20... Training Step: 457... Training loss: 1.7142... 0.1253 sec/batch\n", "Epoch: 1/20... Training Step: 458... Training loss: 1.7019... 0.1227 sec/batch\n", "Epoch: 1/20... Training Step: 459... Training loss: 1.5507... 0.1222 sec/batch\n", "Epoch: 1/20... Training Step: 460... Training loss: 1.7918... 0.1226 sec/batch\n", "Epoch: 1/20... Training Step: 461... Training loss: 1.9374... 0.1258 sec/batch\n", "Epoch: 1/20... Training Step: 462... Training loss: 1.7728... 0.1187 sec/batch\n", "Epoch: 1/20... Training Step: 463... Training loss: 1.7676... 0.1206 sec/batch\n", "Epoch: 1/20... Training Step: 464... Training loss: 1.5364... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 465... Training loss: 2.1169... 0.1199 sec/batch\n", "Epoch: 2/20... Training Step: 466... Training loss: 1.8043... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 467... Training loss: 1.7437... 0.1196 sec/batch\n", "Epoch: 2/20... Training Step: 468... Training loss: 1.6801... 0.1214 sec/batch\n", "Epoch: 2/20... Training Step: 469... Training loss: 2.0278... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 470... Training loss: 1.5746... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 471... Training loss: 1.8345... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 472... Training loss: 1.6416... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 473... Training loss: 1.7075... 0.1241 sec/batch\n", "Epoch: 2/20... Training Step: 474... Training loss: 1.9914... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 475... Training loss: 1.5417... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 476... Training loss: 1.5214... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 477... Training loss: 1.8553... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 478... Training loss: 1.5335... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 479... Training loss: 1.8862... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 480... Training loss: 1.7181... 0.1239 sec/batch\n", "Epoch: 2/20... Training Step: 481... Training loss: 1.4988... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 482... Training loss: 1.5411... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 483... Training loss: 1.6608... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 484... Training loss: 1.4726... 0.1181 sec/batch\n", "Epoch: 2/20... Training Step: 485... Training loss: 1.6968... 0.1271 sec/batch\n", "Epoch: 2/20... Training Step: 486... Training loss: 1.5778... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 487... Training loss: 1.9038... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 488... Training loss: 1.5238... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 489... Training loss: 1.6305... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 490... Training loss: 1.6736... 0.1207 sec/batch\n", "Epoch: 2/20... Training Step: 491... Training loss: 1.7360... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 492... Training loss: 1.5628... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 493... Training loss: 1.6163... 0.1211 sec/batch\n", "Epoch: 2/20... Training Step: 494... Training loss: 1.6658... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 495... Training loss: 1.4685... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 496... Training loss: 1.6188... 0.1200 sec/batch\n", "Epoch: 2/20... Training Step: 497... Training loss: 1.4422... 0.1180 sec/batch\n", "Epoch: 2/20... Training Step: 498... Training loss: 1.5989... 0.1241 sec/batch\n", "Epoch: 2/20... Training Step: 499... Training loss: 1.6538... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 500... Training loss: 1.5559... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 501... Training loss: 1.6995... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 502... Training loss: 1.5303... 0.1238 sec/batch\n", "Epoch: 2/20... Training Step: 503... Training loss: 1.5401... 0.1172 sec/batch\n", "Epoch: 2/20... Training Step: 504... Training loss: 1.8869... 0.1249 sec/batch\n", "Epoch: 2/20... Training Step: 505... Training loss: 1.5657... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 506... Training loss: 1.5236... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 507... Training loss: 1.8203... 0.1213 sec/batch\n", "Epoch: 2/20... Training Step: 508... Training loss: 1.4910... 0.1218 sec/batch\n", "Epoch: 2/20... Training Step: 509... Training loss: 1.6385... 0.1188 sec/batch\n", "Epoch: 2/20... Training Step: 510... Training loss: 1.5416... 0.1244 sec/batch\n", "Epoch: 2/20... Training Step: 511... Training loss: 1.4825... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 512... Training loss: 1.8408... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 513... Training loss: 1.5825... 0.1248 sec/batch\n", "Epoch: 2/20... Training Step: 514... Training loss: 1.8381... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 515... Training loss: 1.6678... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 516... Training loss: 1.8468... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 517... Training loss: 1.7648... 0.1205 sec/batch\n", "Epoch: 2/20... Training Step: 518... Training loss: 1.6612... 0.1214 sec/batch\n", "Epoch: 2/20... Training Step: 519... Training loss: 1.5334... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 520... Training loss: 1.6670... 0.1264 sec/batch\n", "Epoch: 2/20... Training Step: 521... Training loss: 1.8123... 0.1248 sec/batch\n", "Epoch: 2/20... Training Step: 522... Training loss: 1.8160... 0.1246 sec/batch\n", "Epoch: 2/20... Training Step: 523... Training loss: 1.6110... 0.1191 sec/batch\n", "Epoch: 2/20... Training Step: 524... Training loss: 1.6792... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 525... Training loss: 1.6498... 0.1249 sec/batch\n", "Epoch: 2/20... Training Step: 526... Training loss: 1.8336... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 527... Training loss: 1.5916... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 528... Training loss: 1.7026... 0.1227 sec/batch\n", "Epoch: 2/20... Training Step: 529... Training loss: 1.4911... 0.1241 sec/batch\n", "Epoch: 2/20... Training Step: 530... Training loss: 1.6888... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 531... Training loss: 1.6631... 0.1197 sec/batch\n", "Epoch: 2/20... Training Step: 532... Training loss: 1.7223... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 533... Training loss: 1.6294... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 534... Training loss: 1.6289... 0.1279 sec/batch\n", "Epoch: 2/20... Training Step: 535... Training loss: 1.7408... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 536... Training loss: 1.8541... 0.1194 sec/batch\n", "Epoch: 2/20... Training Step: 537... Training loss: 1.6901... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 538... Training loss: 1.5052... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 539... Training loss: 1.9474... 0.1161 sec/batch\n", "Epoch: 2/20... Training Step: 540... Training loss: 1.5172... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 541... Training loss: 1.4782... 0.1203 sec/batch\n", "Epoch: 2/20... Training Step: 542... Training loss: 1.7421... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 543... Training loss: 1.7581... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 544... Training loss: 1.5988... 0.1178 sec/batch\n", "Epoch: 2/20... Training Step: 545... Training loss: 1.6857... 0.1171 sec/batch\n", "Epoch: 2/20... Training Step: 546... Training loss: 1.7556... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 547... Training loss: 1.5255... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 548... Training loss: 1.7376... 0.1200 sec/batch\n", "Epoch: 2/20... Training Step: 549... Training loss: 1.6665... 0.1200 sec/batch\n", "Epoch: 2/20... Training Step: 550... Training loss: 1.5863... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 551... Training loss: 1.5182... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 552... Training loss: 1.7099... 0.1178 sec/batch\n", "Epoch: 2/20... Training Step: 553... Training loss: 1.9698... 0.1189 sec/batch\n", "Epoch: 2/20... Training Step: 554... Training loss: 1.5779... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 555... Training loss: 1.7203... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 556... Training loss: 1.8608... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 557... Training loss: 1.4067... 0.1226 sec/batch\n", "Epoch: 2/20... Training Step: 558... Training loss: 1.8412... 0.1214 sec/batch\n", "Epoch: 2/20... Training Step: 559... Training loss: 1.4484... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 560... Training loss: 1.7325... 0.1258 sec/batch\n", "Epoch: 2/20... Training Step: 561... Training loss: 2.0178... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 562... Training loss: 1.7388... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 563... Training loss: 1.7662... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 564... Training loss: 1.5795... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 565... Training loss: 1.6619... 0.1283 sec/batch\n", "Epoch: 2/20... Training Step: 566... Training loss: 1.8696... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 567... Training loss: 1.9462... 0.1194 sec/batch\n", "Epoch: 2/20... Training Step: 568... Training loss: 1.6107... 0.1226 sec/batch\n", "Epoch: 2/20... Training Step: 569... Training loss: 1.8654... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 570... Training loss: 2.0711... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 571... Training loss: 1.9632... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 572... Training loss: 2.0685... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 573... Training loss: 1.9684... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 574... Training loss: 1.8868... 0.1275 sec/batch\n", "Epoch: 2/20... Training Step: 575... Training loss: 1.8931... 0.1193 sec/batch\n", "Epoch: 2/20... Training Step: 576... Training loss: 1.8381... 0.1197 sec/batch\n", "Epoch: 2/20... Training Step: 577... Training loss: 1.8021... 0.1183 sec/batch\n", "Epoch: 2/20... Training Step: 578... Training loss: 1.9651... 0.1182 sec/batch\n", "Epoch: 2/20... Training Step: 579... Training loss: 1.8136... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 580... Training loss: 1.6910... 0.1174 sec/batch\n", "Epoch: 2/20... Training Step: 581... Training loss: 1.9931... 0.1183 sec/batch\n", "Epoch: 2/20... Training Step: 582... Training loss: 2.0034... 0.1180 sec/batch\n", "Epoch: 2/20... Training Step: 583... Training loss: 1.7529... 0.1211 sec/batch\n", "Epoch: 2/20... Training Step: 584... Training loss: 1.6017... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 585... Training loss: 1.7148... 0.1195 sec/batch\n", "Epoch: 2/20... Training Step: 586... Training loss: 1.7608... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 587... Training loss: 1.9276... 0.1203 sec/batch\n", "Epoch: 2/20... Training Step: 588... Training loss: 1.9657... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 589... Training loss: 1.9765... 0.1337 sec/batch\n", "Epoch: 2/20... Training Step: 590... Training loss: 1.6106... 0.1296 sec/batch\n", "Epoch: 2/20... Training Step: 591... Training loss: 1.6972... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 592... Training loss: 1.6956... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 593... Training loss: 1.8264... 0.1205 sec/batch\n", "Epoch: 2/20... Training Step: 594... Training loss: 1.7785... 0.1188 sec/batch\n", "Epoch: 2/20... Training Step: 595... Training loss: 2.0840... 0.1218 sec/batch\n", "Epoch: 2/20... Training Step: 596... Training loss: 1.8626... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 597... Training loss: 1.6291... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 598... Training loss: 1.7861... 0.1194 sec/batch\n", "Epoch: 2/20... Training Step: 599... Training loss: 1.6117... 0.1248 sec/batch\n", "Epoch: 2/20... Training Step: 600... Training loss: 1.5718... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 601... Training loss: 1.6199... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 602... Training loss: 1.7399... 0.1192 sec/batch\n", "Epoch: 2/20... Training Step: 603... Training loss: 1.5251... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 604... Training loss: 1.6950... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 605... Training loss: 1.7904... 0.1260 sec/batch\n", "Epoch: 2/20... Training Step: 606... Training loss: 1.7237... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 607... Training loss: 1.5295... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 608... Training loss: 1.6737... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 609... Training loss: 1.7418... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 610... Training loss: 1.6588... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 611... Training loss: 1.9643... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 612... Training loss: 1.6860... 0.1183 sec/batch\n", "Epoch: 2/20... Training Step: 613... Training loss: 1.7044... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 614... Training loss: 1.7717... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 615... Training loss: 1.9744... 0.1204 sec/batch\n", "Epoch: 2/20... Training Step: 616... Training loss: 1.8483... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 617... Training loss: 1.8924... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 618... Training loss: 1.8024... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 619... Training loss: 1.8520... 0.1198 sec/batch\n", "Epoch: 2/20... Training Step: 620... Training loss: 1.5388... 0.1195 sec/batch\n", "Epoch: 2/20... Training Step: 621... Training loss: 1.5904... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 622... Training loss: 1.7615... 0.1214 sec/batch\n", "Epoch: 2/20... Training Step: 623... Training loss: 1.6085... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 624... Training loss: 1.6241... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 625... Training loss: 1.8298... 0.1250 sec/batch\n", "Epoch: 2/20... Training Step: 626... Training loss: 1.6570... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 627... Training loss: 1.7788... 0.1267 sec/batch\n", "Epoch: 2/20... Training Step: 628... Training loss: 1.4951... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 629... Training loss: 1.6211... 0.1262 sec/batch\n", "Epoch: 2/20... Training Step: 630... Training loss: 1.5436... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 631... Training loss: 1.7008... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 632... Training loss: 1.8135... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 633... Training loss: 1.8089... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 634... Training loss: 1.8158... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 635... Training loss: 1.7606... 0.1213 sec/batch\n", "Epoch: 2/20... Training Step: 636... Training loss: 1.6653... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 637... Training loss: 1.8007... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 638... Training loss: 1.7540... 0.1243 sec/batch\n", "Epoch: 2/20... Training Step: 639... Training loss: 1.7894... 0.1247 sec/batch\n", "Epoch: 2/20... Training Step: 640... Training loss: 1.4989... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 641... Training loss: 1.5423... 0.1193 sec/batch\n", "Epoch: 2/20... Training Step: 642... Training loss: 1.8979... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 643... Training loss: 1.6102... 0.1244 sec/batch\n", "Epoch: 2/20... Training Step: 644... Training loss: 1.8565... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 645... Training loss: 1.5797... 0.1293 sec/batch\n", "Epoch: 2/20... Training Step: 646... Training loss: 1.8037... 0.1279 sec/batch\n", "Epoch: 2/20... Training Step: 647... Training loss: 1.7730... 0.1312 sec/batch\n", "Epoch: 2/20... Training Step: 648... Training loss: 1.7959... 0.1239 sec/batch\n", "Epoch: 2/20... Training Step: 649... Training loss: 1.8847... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 650... Training loss: 1.6873... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 651... Training loss: 1.8867... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 652... Training loss: 1.5452... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 653... Training loss: 1.7488... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 654... Training loss: 1.6784... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 655... Training loss: 1.4720... 0.1264 sec/batch\n", "Epoch: 2/20... Training Step: 656... Training loss: 1.8478... 0.1243 sec/batch\n", "Epoch: 2/20... Training Step: 657... Training loss: 1.7053... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 658... Training loss: 1.7101... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 659... Training loss: 1.8143... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 660... Training loss: 1.7087... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 661... Training loss: 1.4732... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 662... Training loss: 1.7075... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 663... Training loss: 1.5147... 0.1248 sec/batch\n", "Epoch: 2/20... Training Step: 664... Training loss: 1.5947... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 665... Training loss: 1.7045... 0.1227 sec/batch\n", "Epoch: 2/20... Training Step: 666... Training loss: 1.6102... 0.1226 sec/batch\n", "Epoch: 2/20... Training Step: 667... Training loss: 1.5347... 0.1256 sec/batch\n", "Epoch: 2/20... Training Step: 668... Training loss: 1.7791... 0.1275 sec/batch\n", "Epoch: 2/20... Training Step: 669... Training loss: 1.6174... 0.1349 sec/batch\n", "Epoch: 2/20... Training Step: 670... Training loss: 1.5101... 0.1356 sec/batch\n", "Epoch: 2/20... Training Step: 671... Training loss: 1.5001... 0.1285 sec/batch\n", "Epoch: 2/20... Training Step: 672... Training loss: 1.7598... 0.1323 sec/batch\n", "Epoch: 2/20... Training Step: 673... Training loss: 1.5729... 0.1286 sec/batch\n", "Epoch: 2/20... Training Step: 674... Training loss: 1.5274... 0.1186 sec/batch\n", "Epoch: 2/20... Training Step: 675... Training loss: 1.3030... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 676... Training loss: 1.7295... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 677... Training loss: 1.7557... 0.1276 sec/batch\n", "Epoch: 2/20... Training Step: 678... Training loss: 1.5915... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 679... Training loss: 1.7897... 0.1793 sec/batch\n", "Epoch: 2/20... Training Step: 680... Training loss: 1.4899... 0.1643 sec/batch\n", "Epoch: 2/20... Training Step: 681... Training loss: 1.6719... 0.1357 sec/batch\n", "Epoch: 2/20... Training Step: 682... Training loss: 1.6153... 0.1455 sec/batch\n", "Epoch: 2/20... Training Step: 683... Training loss: 1.7885... 0.1401 sec/batch\n", "Epoch: 2/20... Training Step: 684... Training loss: 1.6302... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 685... Training loss: 1.5734... 0.1197 sec/batch\n", "Epoch: 2/20... Training Step: 686... Training loss: 1.8202... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 687... Training loss: 1.8504... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 688... Training loss: 1.9010... 0.1262 sec/batch\n", "Epoch: 2/20... Training Step: 689... Training loss: 1.6481... 0.1198 sec/batch\n", "Epoch: 2/20... Training Step: 690... Training loss: 1.8181... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 691... Training loss: 1.8177... 0.1186 sec/batch\n", "Epoch: 2/20... Training Step: 692... Training loss: 1.5322... 0.1246 sec/batch\n", "Epoch: 2/20... Training Step: 693... Training loss: 1.5682... 0.1203 sec/batch\n", "Epoch: 2/20... Training Step: 694... Training loss: 1.6734... 0.1195 sec/batch\n", "Epoch: 2/20... Training Step: 695... Training loss: 1.5798... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 696... Training loss: 1.5021... 0.1162 sec/batch\n", "Epoch: 2/20... Training Step: 697... Training loss: 1.9915... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 698... Training loss: 1.6299... 0.1180 sec/batch\n", "Epoch: 2/20... Training Step: 699... Training loss: 1.8016... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 700... Training loss: 1.5611... 0.1258 sec/batch\n", "Epoch: 2/20... Training Step: 701... Training loss: 2.0087... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 702... Training loss: 1.5452... 0.1183 sec/batch\n", "Epoch: 2/20... Training Step: 703... Training loss: 1.6698... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 704... Training loss: 1.8368... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 705... Training loss: 1.6080... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 706... Training loss: 1.6187... 0.1137 sec/batch\n", "Epoch: 2/20... Training Step: 707... Training loss: 1.9185... 0.1197 sec/batch\n", "Epoch: 2/20... Training Step: 708... Training loss: 1.6918... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 709... Training loss: 1.6527... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 710... Training loss: 1.3810... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 711... Training loss: 1.5839... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 712... Training loss: 1.7290... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 713... Training loss: 1.6177... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 714... Training loss: 1.5815... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 715... Training loss: 1.7203... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 716... Training loss: 1.6022... 0.1257 sec/batch\n", "Epoch: 2/20... Training Step: 717... Training loss: 1.4405... 0.1269 sec/batch\n", "Epoch: 2/20... Training Step: 718... Training loss: 1.7779... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 719... Training loss: 1.6135... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 720... Training loss: 1.6352... 0.1187 sec/batch\n", "Epoch: 2/20... Training Step: 721... Training loss: 1.7968... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 722... Training loss: 1.5118... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 723... Training loss: 1.6732... 0.1296 sec/batch\n", "Epoch: 2/20... Training Step: 724... Training loss: 1.6489... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 725... Training loss: 1.8273... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 726... Training loss: 1.6901... 0.1188 sec/batch\n", "Epoch: 2/20... Training Step: 727... Training loss: 1.6478... 0.1249 sec/batch\n", "Epoch: 2/20... Training Step: 728... Training loss: 1.8091... 0.1242 sec/batch\n", "Epoch: 2/20... Training Step: 729... Training loss: 1.7358... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 730... Training loss: 1.6705... 0.1199 sec/batch\n", "Epoch: 2/20... Training Step: 731... Training loss: 1.7879... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 732... Training loss: 1.8142... 0.1214 sec/batch\n", "Epoch: 2/20... Training Step: 733... Training loss: 1.7720... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 734... Training loss: 1.8787... 0.1196 sec/batch\n", "Epoch: 2/20... Training Step: 735... Training loss: 1.6196... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 736... Training loss: 1.7935... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 737... Training loss: 1.7010... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 738... Training loss: 1.5914... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 739... Training loss: 1.7384... 0.1179 sec/batch\n", "Epoch: 2/20... Training Step: 740... Training loss: 1.6520... 0.1200 sec/batch\n", "Epoch: 2/20... Training Step: 741... Training loss: 1.6665... 0.1246 sec/batch\n", "Epoch: 2/20... Training Step: 742... Training loss: 1.9144... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 743... Training loss: 1.4846... 0.1192 sec/batch\n", "Epoch: 2/20... Training Step: 744... Training loss: 1.5296... 0.1244 sec/batch\n", "Epoch: 2/20... Training Step: 745... Training loss: 1.5161... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 746... Training loss: 1.5085... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 747... Training loss: 1.5750... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 748... Training loss: 1.5892... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 749... Training loss: 1.4467... 0.1194 sec/batch\n", "Epoch: 2/20... Training Step: 750... Training loss: 1.5899... 0.1163 sec/batch\n", "Epoch: 2/20... Training Step: 751... Training loss: 1.6438... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 752... Training loss: 1.7068... 0.1321 sec/batch\n", "Epoch: 2/20... Training Step: 753... Training loss: 1.8248... 0.1280 sec/batch\n", "Epoch: 2/20... Training Step: 754... Training loss: 1.7047... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 755... Training loss: 1.6354... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 756... Training loss: 1.5990... 0.1218 sec/batch\n", "Epoch: 2/20... Training Step: 757... Training loss: 1.5000... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 758... Training loss: 1.5278... 0.1247 sec/batch\n", "Epoch: 2/20... Training Step: 759... Training loss: 1.5278... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 760... Training loss: 1.8009... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 761... Training loss: 1.5023... 0.1196 sec/batch\n", "Epoch: 2/20... Training Step: 762... Training loss: 1.6568... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 763... Training loss: 1.6426... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 764... Training loss: 1.6477... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 765... Training loss: 1.5885... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 766... Training loss: 1.6078... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 767... Training loss: 1.4139... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 768... Training loss: 1.8775... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 769... Training loss: 1.5152... 0.1250 sec/batch\n", "Epoch: 2/20... Training Step: 770... Training loss: 1.8197... 0.1175 sec/batch\n", "Epoch: 2/20... Training Step: 771... Training loss: 1.6091... 0.1251 sec/batch\n", "Epoch: 2/20... Training Step: 772... Training loss: 1.9504... 0.1265 sec/batch\n", "Epoch: 2/20... Training Step: 773... Training loss: 1.6483... 0.1250 sec/batch\n", "Epoch: 2/20... Training Step: 774... Training loss: 1.6440... 0.1169 sec/batch\n", "Epoch: 2/20... Training Step: 775... Training loss: 1.6952... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 776... Training loss: 1.6463... 0.1295 sec/batch\n", "Epoch: 2/20... Training Step: 777... Training loss: 1.6828... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 778... Training loss: 1.6365... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 779... Training loss: 1.4231... 0.1188 sec/batch\n", "Epoch: 2/20... Training Step: 780... Training loss: 1.5930... 0.1212 sec/batch\n", "Epoch: 2/20... Training Step: 781... Training loss: 1.6856... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 782... Training loss: 1.4957... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 783... Training loss: 1.6320... 0.1189 sec/batch\n", "Epoch: 2/20... Training Step: 784... Training loss: 1.5432... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 785... Training loss: 1.4257... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 786... Training loss: 1.7358... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 787... Training loss: 1.4737... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 788... Training loss: 1.4110... 0.1243 sec/batch\n", "Epoch: 2/20... Training Step: 789... Training loss: 1.4146... 0.1178 sec/batch\n", "Epoch: 2/20... Training Step: 790... Training loss: 1.5268... 0.1191 sec/batch\n", "Epoch: 2/20... Training Step: 791... Training loss: 1.6573... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 792... Training loss: 1.4660... 0.1230 sec/batch\n", "Epoch: 2/20... Training Step: 793... Training loss: 1.6335... 0.1257 sec/batch\n", "Epoch: 2/20... Training Step: 794... Training loss: 1.4822... 0.1194 sec/batch\n", "Epoch: 2/20... Training Step: 795... Training loss: 1.5909... 0.1238 sec/batch\n", "Epoch: 2/20... Training Step: 796... Training loss: 1.5352... 0.1192 sec/batch\n", "Epoch: 2/20... Training Step: 797... Training loss: 1.4827... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 798... Training loss: 1.5295... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 799... Training loss: 1.8338... 0.1261 sec/batch\n", "Epoch: 2/20... Training Step: 800... Training loss: 1.6561... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 801... Training loss: 1.6718... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 802... Training loss: 1.6799... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 803... Training loss: 1.6305... 0.1195 sec/batch\n", "Epoch: 2/20... Training Step: 804... Training loss: 1.6059... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 805... Training loss: 1.5072... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 806... Training loss: 1.5934... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 807... Training loss: 1.4723... 0.1217 sec/batch\n", "Epoch: 2/20... Training Step: 808... Training loss: 1.8214... 0.1237 sec/batch\n", "Epoch: 2/20... Training Step: 809... Training loss: 1.5360... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 810... Training loss: 1.4941... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 811... Training loss: 1.5319... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 812... Training loss: 1.8912... 0.1190 sec/batch\n", "Epoch: 2/20... Training Step: 813... Training loss: 1.5442... 0.1204 sec/batch\n", "Epoch: 2/20... Training Step: 814... Training loss: 1.5836... 0.1218 sec/batch\n", "Epoch: 2/20... Training Step: 815... Training loss: 1.4504... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 816... Training loss: 1.4963... 0.1211 sec/batch\n", "Epoch: 2/20... Training Step: 817... Training loss: 1.3978... 0.1253 sec/batch\n", "Epoch: 2/20... Training Step: 818... Training loss: 1.2765... 0.1211 sec/batch\n", "Epoch: 2/20... Training Step: 819... Training loss: 1.8025... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 820... Training loss: 1.4530... 0.1242 sec/batch\n", "Epoch: 2/20... Training Step: 821... Training loss: 1.4771... 0.1208 sec/batch\n", "Epoch: 2/20... Training Step: 822... Training loss: 1.6415... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 823... Training loss: 1.5389... 0.1283 sec/batch\n", "Epoch: 2/20... Training Step: 824... Training loss: 1.3361... 0.1234 sec/batch\n", "Epoch: 2/20... Training Step: 825... Training loss: 1.7247... 0.1187 sec/batch\n", "Epoch: 2/20... Training Step: 826... Training loss: 1.5560... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 827... Training loss: 1.4898... 0.1238 sec/batch\n", "Epoch: 2/20... Training Step: 828... Training loss: 1.5243... 0.1196 sec/batch\n", "Epoch: 2/20... Training Step: 829... Training loss: 1.4751... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 830... Training loss: 1.5885... 0.1209 sec/batch\n", "Epoch: 2/20... Training Step: 831... Training loss: 1.3582... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 832... Training loss: 1.6574... 0.1239 sec/batch\n", "Epoch: 2/20... Training Step: 833... Training loss: 1.6347... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 834... Training loss: 1.6155... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 835... Training loss: 1.5048... 0.1202 sec/batch\n", "Epoch: 2/20... Training Step: 836... Training loss: 1.5085... 0.1191 sec/batch\n", "Epoch: 2/20... Training Step: 837... Training loss: 1.6117... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 838... Training loss: 1.7322... 0.1242 sec/batch\n", "Epoch: 2/20... Training Step: 839... Training loss: 1.5658... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 840... Training loss: 1.5773... 0.1226 sec/batch\n", "Epoch: 2/20... Training Step: 841... Training loss: 1.6765... 0.1220 sec/batch\n", "Epoch: 2/20... Training Step: 842... Training loss: 1.6058... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 843... Training loss: 1.4991... 0.1241 sec/batch\n", "Epoch: 2/20... Training Step: 844... Training loss: 1.8051... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 845... Training loss: 1.5494... 0.1167 sec/batch\n", "Epoch: 2/20... Training Step: 846... Training loss: 1.4711... 0.1233 sec/batch\n", "Epoch: 2/20... Training Step: 847... Training loss: 1.4854... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 848... Training loss: 1.7007... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 849... Training loss: 1.4295... 0.1249 sec/batch\n", "Epoch: 2/20... Training Step: 850... Training loss: 1.6863... 0.1221 sec/batch\n", "Epoch: 2/20... Training Step: 851... Training loss: 1.6460... 0.1252 sec/batch\n", "Epoch: 2/20... Training Step: 852... Training loss: 1.4551... 0.1169 sec/batch\n", "Epoch: 2/20... Training Step: 853... Training loss: 1.6202... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 854... Training loss: 1.6456... 0.1245 sec/batch\n", "Epoch: 2/20... Training Step: 855... Training loss: 1.4674... 0.1238 sec/batch\n", "Epoch: 2/20... Training Step: 856... Training loss: 1.4540... 0.1181 sec/batch\n", "Epoch: 2/20... Training Step: 857... Training loss: 1.4891... 0.1229 sec/batch\n", "Epoch: 2/20... Training Step: 858... Training loss: 1.6085... 0.1213 sec/batch\n", "Epoch: 2/20... Training Step: 859... Training loss: 1.3830... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 860... Training loss: 1.6827... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 861... Training loss: 1.3171... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 862... Training loss: 1.3113... 0.1263 sec/batch\n", "Epoch: 2/20... Training Step: 863... Training loss: 1.5239... 0.1317 sec/batch\n", "Epoch: 2/20... Training Step: 864... Training loss: 1.3605... 0.1394 sec/batch\n", "Epoch: 2/20... Training Step: 865... Training loss: 1.6535... 0.1397 sec/batch\n", "Epoch: 2/20... Training Step: 866... Training loss: 1.5430... 0.1356 sec/batch\n", "Epoch: 2/20... Training Step: 867... Training loss: 1.4408... 0.1318 sec/batch\n", "Epoch: 2/20... Training Step: 868... Training loss: 1.5662... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 869... Training loss: 1.4922... 0.1241 sec/batch\n", "Epoch: 2/20... Training Step: 870... Training loss: 1.7384... 0.1287 sec/batch\n", "Epoch: 2/20... Training Step: 871... Training loss: 1.5672... 0.1232 sec/batch\n", "Epoch: 2/20... Training Step: 872... Training loss: 1.8103... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 873... Training loss: 1.5248... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 874... Training loss: 1.5134... 0.1195 sec/batch\n", "Epoch: 2/20... Training Step: 875... Training loss: 1.4664... 0.1216 sec/batch\n", "Epoch: 2/20... Training Step: 876... Training loss: 1.4864... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 877... Training loss: 1.6520... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 878... Training loss: 1.5669... 0.1227 sec/batch\n", "Epoch: 2/20... Training Step: 879... Training loss: 1.8312... 0.1279 sec/batch\n", "Epoch: 2/20... Training Step: 880... Training loss: 1.7645... 0.1224 sec/batch\n", "Epoch: 2/20... Training Step: 881... Training loss: 1.7802... 0.1231 sec/batch\n", "Epoch: 2/20... Training Step: 882... Training loss: 1.4438... 0.1211 sec/batch\n", "Epoch: 2/20... Training Step: 883... Training loss: 1.5683... 0.1246 sec/batch\n", "Epoch: 2/20... Training Step: 884... Training loss: 1.3227... 0.1179 sec/batch\n", "Epoch: 2/20... Training Step: 885... Training loss: 1.6070... 0.1201 sec/batch\n", "Epoch: 2/20... Training Step: 886... Training loss: 1.5553... 0.1225 sec/batch\n", "Epoch: 2/20... Training Step: 887... Training loss: 1.6264... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 888... Training loss: 1.7644... 0.1215 sec/batch\n", "Epoch: 2/20... Training Step: 889... Training loss: 1.4650... 0.1268 sec/batch\n", "Epoch: 2/20... Training Step: 890... Training loss: 1.6550... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 891... Training loss: 1.5634... 0.1265 sec/batch\n", "Epoch: 2/20... Training Step: 892... Training loss: 1.8245... 0.1254 sec/batch\n", "Epoch: 2/20... Training Step: 893... Training loss: 1.4900... 0.1210 sec/batch\n", "Epoch: 2/20... Training Step: 894... Training loss: 1.5772... 0.1218 sec/batch\n", "Epoch: 2/20... Training Step: 895... Training loss: 1.8296... 0.1242 sec/batch\n", "Epoch: 2/20... Training Step: 896... Training loss: 1.5620... 0.1300 sec/batch\n", "Epoch: 2/20... Training Step: 897... Training loss: 1.8698... 0.1343 sec/batch\n", "Epoch: 2/20... Training Step: 898... Training loss: 1.6986... 0.1357 sec/batch\n", "Epoch: 2/20... Training Step: 899... Training loss: 1.4239... 0.1191 sec/batch\n", "Epoch: 2/20... Training Step: 900... Training loss: 1.6140... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 901... Training loss: 1.5297... 0.1268 sec/batch\n", "Epoch: 2/20... Training Step: 902... Training loss: 1.7523... 0.1198 sec/batch\n", "Epoch: 2/20... Training Step: 903... Training loss: 1.8285... 0.1243 sec/batch\n", "Epoch: 2/20... Training Step: 904... Training loss: 1.7777... 0.1244 sec/batch\n", "Epoch: 2/20... Training Step: 905... Training loss: 1.6053... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 906... Training loss: 1.4899... 0.1242 sec/batch\n", "Epoch: 2/20... Training Step: 907... Training loss: 1.7333... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 908... Training loss: 1.5857... 0.1244 sec/batch\n", "Epoch: 2/20... Training Step: 909... Training loss: 1.6375... 0.1235 sec/batch\n", "Epoch: 2/20... Training Step: 910... Training loss: 1.5512... 0.1222 sec/batch\n", "Epoch: 2/20... Training Step: 911... Training loss: 1.6102... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 912... Training loss: 1.5020... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 913... Training loss: 1.4942... 0.1268 sec/batch\n", "Epoch: 2/20... Training Step: 914... Training loss: 1.5327... 0.1223 sec/batch\n", "Epoch: 2/20... Training Step: 915... Training loss: 1.5921... 0.1206 sec/batch\n", "Epoch: 2/20... Training Step: 916... Training loss: 1.6016... 0.1236 sec/batch\n", "Epoch: 2/20... Training Step: 917... Training loss: 1.5069... 0.1227 sec/batch\n", "Epoch: 2/20... Training Step: 918... Training loss: 1.8236... 0.1219 sec/batch\n", "Epoch: 2/20... Training Step: 919... Training loss: 1.7293... 0.1249 sec/batch\n", "Epoch: 2/20... Training Step: 920... Training loss: 1.5183... 0.1228 sec/batch\n", "Epoch: 2/20... Training Step: 921... Training loss: 1.4369... 0.1184 sec/batch\n", "Epoch: 2/20... Training Step: 922... Training loss: 1.4567... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 923... Training loss: 1.4554... 0.1246 sec/batch\n", "Epoch: 2/20... Training Step: 924... Training loss: 1.5544... 0.1297 sec/batch\n", "Epoch: 2/20... Training Step: 925... Training loss: 1.7085... 0.1255 sec/batch\n", "Epoch: 2/20... Training Step: 926... Training loss: 1.5437... 0.1240 sec/batch\n", "Epoch: 2/20... Training Step: 927... Training loss: 1.6608... 0.1226 sec/batch\n", "Epoch: 2/20... Training Step: 928... Training loss: 1.3968... 0.1187 sec/batch\n", "Epoch: 3/20... Training Step: 929... Training loss: 1.9449... 0.1186 sec/batch\n", "Epoch: 3/20... Training Step: 930... Training loss: 1.5780... 0.1178 sec/batch\n", "Epoch: 3/20... Training Step: 931... Training loss: 1.4143... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 932... Training loss: 1.4950... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 933... Training loss: 1.6531... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 934... Training loss: 1.3630... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 935... Training loss: 1.7588... 0.1272 sec/batch\n", "Epoch: 3/20... Training Step: 936... Training loss: 1.4835... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 937... Training loss: 1.4910... 0.1202 sec/batch\n", "Epoch: 3/20... Training Step: 938... Training loss: 1.7330... 0.1177 sec/batch\n", "Epoch: 3/20... Training Step: 939... Training loss: 1.4915... 0.1176 sec/batch\n", "Epoch: 3/20... Training Step: 940... Training loss: 1.2877... 0.1227 sec/batch\n", "Epoch: 3/20... Training Step: 941... Training loss: 1.7049... 0.1183 sec/batch\n", "Epoch: 3/20... Training Step: 942... Training loss: 1.3454... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 943... Training loss: 1.5023... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 944... Training loss: 1.5896... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 945... Training loss: 1.3567... 0.1321 sec/batch\n", "Epoch: 3/20... Training Step: 946... Training loss: 1.3792... 0.1288 sec/batch\n", "Epoch: 3/20... Training Step: 947... Training loss: 1.4189... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 948... Training loss: 1.3969... 0.1185 sec/batch\n", "Epoch: 3/20... Training Step: 949... Training loss: 1.6710... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 950... Training loss: 1.4185... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 951... Training loss: 1.6852... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 952... Training loss: 1.3690... 0.1263 sec/batch\n", "Epoch: 3/20... Training Step: 953... Training loss: 1.4494... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 954... Training loss: 1.5974... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 955... Training loss: 1.5788... 0.1268 sec/batch\n", "Epoch: 3/20... Training Step: 956... Training loss: 1.3920... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 957... Training loss: 1.5513... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 958... Training loss: 1.4653... 0.1218 sec/batch\n", "Epoch: 3/20... Training Step: 959... Training loss: 1.3200... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 960... Training loss: 1.4287... 0.1249 sec/batch\n", "Epoch: 3/20... Training Step: 961... Training loss: 1.3820... 0.1249 sec/batch\n", "Epoch: 3/20... Training Step: 962... Training loss: 1.3546... 0.1176 sec/batch\n", "Epoch: 3/20... Training Step: 963... Training loss: 1.3718... 0.1220 sec/batch\n", "Epoch: 3/20... Training Step: 964... Training loss: 1.4093... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 965... Training loss: 1.5570... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 966... Training loss: 1.3754... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 967... Training loss: 1.3773... 0.1185 sec/batch\n", "Epoch: 3/20... Training Step: 968... Training loss: 1.6443... 0.1251 sec/batch\n", "Epoch: 3/20... Training Step: 969... Training loss: 1.3945... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 970... Training loss: 1.3672... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 971... Training loss: 1.5994... 0.1226 sec/batch\n", "Epoch: 3/20... Training Step: 972... Training loss: 1.3363... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 973... Training loss: 1.4538... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 974... Training loss: 1.3994... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 975... Training loss: 1.3655... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 976... Training loss: 1.5634... 0.1207 sec/batch\n", "Epoch: 3/20... Training Step: 977... Training loss: 1.4231... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 978... Training loss: 1.7701... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 979... Training loss: 1.4485... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 980... Training loss: 1.5609... 0.1254 sec/batch\n", "Epoch: 3/20... Training Step: 981... Training loss: 1.7103... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 982... Training loss: 1.5043... 0.1146 sec/batch\n", "Epoch: 3/20... Training Step: 983... Training loss: 1.2968... 0.1188 sec/batch\n", "Epoch: 3/20... Training Step: 984... Training loss: 1.4487... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 985... Training loss: 1.7074... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 986... Training loss: 1.6142... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 987... Training loss: 1.3614... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 988... Training loss: 1.4105... 0.1208 sec/batch\n", "Epoch: 3/20... Training Step: 989... Training loss: 1.5193... 0.1241 sec/batch\n", "Epoch: 3/20... Training Step: 990... Training loss: 1.5993... 0.1178 sec/batch\n", "Epoch: 3/20... Training Step: 991... Training loss: 1.3827... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 992... Training loss: 1.4665... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 993... Training loss: 1.3230... 0.1242 sec/batch\n", "Epoch: 3/20... Training Step: 994... Training loss: 1.6459... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 995... Training loss: 1.4463... 0.1243 sec/batch\n", "Epoch: 3/20... Training Step: 996... Training loss: 1.6212... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 997... Training loss: 1.3989... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 998... Training loss: 1.4242... 0.1238 sec/batch\n", "Epoch: 3/20... Training Step: 999... Training loss: 1.5636... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1000... Training loss: 1.5529... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1001... Training loss: 1.5257... 0.1177 sec/batch\n", "Epoch: 3/20... Training Step: 1002... Training loss: 1.4005... 0.1252 sec/batch\n", "Epoch: 3/20... Training Step: 1003... Training loss: 1.7731... 0.1245 sec/batch\n", "Epoch: 3/20... Training Step: 1004... Training loss: 1.4202... 0.1186 sec/batch\n", "Epoch: 3/20... Training Step: 1005... Training loss: 1.3353... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1006... Training loss: 1.4132... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1007... Training loss: 1.4927... 0.1216 sec/batch\n", "Epoch: 3/20... Training Step: 1008... Training loss: 1.3253... 0.1216 sec/batch\n", "Epoch: 3/20... Training Step: 1009... Training loss: 1.5526... 0.1250 sec/batch\n", "Epoch: 3/20... Training Step: 1010... Training loss: 1.4232... 0.1196 sec/batch\n", "Epoch: 3/20... Training Step: 1011... Training loss: 1.3292... 0.1196 sec/batch\n", "Epoch: 3/20... Training Step: 1012... Training loss: 1.5740... 0.1256 sec/batch\n", "Epoch: 3/20... Training Step: 1013... Training loss: 1.4884... 0.1178 sec/batch\n", "Epoch: 3/20... Training Step: 1014... Training loss: 1.5495... 0.1208 sec/batch\n", "Epoch: 3/20... Training Step: 1015... Training loss: 1.3761... 0.1253 sec/batch\n", "Epoch: 3/20... Training Step: 1016... Training loss: 1.5496... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 1017... Training loss: 1.7168... 0.1249 sec/batch\n", "Epoch: 3/20... Training Step: 1018... Training loss: 1.4293... 0.1175 sec/batch\n", "Epoch: 3/20... Training Step: 1019... Training loss: 1.5856... 0.1164 sec/batch\n", "Epoch: 3/20... Training Step: 1020... Training loss: 1.8311... 0.1222 sec/batch\n", "Epoch: 3/20... Training Step: 1021... Training loss: 1.3409... 0.1226 sec/batch\n", "Epoch: 3/20... Training Step: 1022... Training loss: 1.8408... 0.1253 sec/batch\n", "Epoch: 3/20... Training Step: 1023... Training loss: 1.4086... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 1024... Training loss: 1.5127... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1025... Training loss: 1.9485... 0.1273 sec/batch\n", "Epoch: 3/20... Training Step: 1026... Training loss: 1.6082... 0.1271 sec/batch\n", "Epoch: 3/20... Training Step: 1027... Training loss: 1.5680... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1028... Training loss: 1.4651... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1029... Training loss: 1.4863... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1030... Training loss: 1.8156... 0.1183 sec/batch\n", "Epoch: 3/20... Training Step: 1031... Training loss: 1.8167... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1032... Training loss: 1.5805... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1033... Training loss: 1.6752... 0.1198 sec/batch\n", "Epoch: 3/20... Training Step: 1034... Training loss: 1.8935... 0.1246 sec/batch\n", "Epoch: 3/20... Training Step: 1035... Training loss: 1.7469... 0.1196 sec/batch\n", "Epoch: 3/20... Training Step: 1036... Training loss: 1.8368... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 1037... Training loss: 1.8881... 0.1188 sec/batch\n", "Epoch: 3/20... Training Step: 1038... Training loss: 1.5683... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1039... Training loss: 1.6711... 0.1254 sec/batch\n", "Epoch: 3/20... Training Step: 1040... Training loss: 1.6341... 0.1196 sec/batch\n", "Epoch: 3/20... Training Step: 1041... Training loss: 1.5969... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1042... Training loss: 1.8003... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1043... Training loss: 1.6066... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 1044... Training loss: 1.4393... 0.1198 sec/batch\n", "Epoch: 3/20... Training Step: 1045... Training loss: 1.7296... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1046... Training loss: 1.7490... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1047... Training loss: 1.6345... 0.1210 sec/batch\n", "Epoch: 3/20... Training Step: 1048... Training loss: 1.4011... 0.1202 sec/batch\n", "Epoch: 3/20... Training Step: 1049... Training loss: 1.5176... 0.1219 sec/batch\n", "Epoch: 3/20... Training Step: 1050... Training loss: 1.6017... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1051... Training loss: 1.7129... 0.1179 sec/batch\n", "Epoch: 3/20... Training Step: 1052... Training loss: 1.6136... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 1053... Training loss: 1.6677... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1054... Training loss: 1.3784... 0.1184 sec/batch\n", "Epoch: 3/20... Training Step: 1055... Training loss: 1.4385... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1056... Training loss: 1.5424... 0.1260 sec/batch\n", "Epoch: 3/20... Training Step: 1057... Training loss: 1.6811... 0.1172 sec/batch\n", "Epoch: 3/20... Training Step: 1058... Training loss: 1.4956... 0.1194 sec/batch\n", "Epoch: 3/20... Training Step: 1059... Training loss: 1.8544... 0.1348 sec/batch\n", "Epoch: 3/20... Training Step: 1060... Training loss: 1.6321... 0.1359 sec/batch\n", "Epoch: 3/20... Training Step: 1061... Training loss: 1.5454... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1062... Training loss: 1.6454... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 1063... Training loss: 1.4355... 0.1192 sec/batch\n", "Epoch: 3/20... Training Step: 1064... Training loss: 1.4099... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1065... Training loss: 1.3926... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1066... Training loss: 1.4718... 0.1207 sec/batch\n", "Epoch: 3/20... Training Step: 1067... Training loss: 1.4711... 0.1203 sec/batch\n", "Epoch: 3/20... Training Step: 1068... Training loss: 1.5006... 0.1210 sec/batch\n", "Epoch: 3/20... Training Step: 1069... Training loss: 1.4499... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1070... Training loss: 1.4404... 0.1260 sec/batch\n", "Epoch: 3/20... Training Step: 1071... Training loss: 1.3352... 0.1286 sec/batch\n", "Epoch: 3/20... Training Step: 1072... Training loss: 1.4516... 0.1287 sec/batch\n", "Epoch: 3/20... Training Step: 1073... Training loss: 1.5586... 0.1205 sec/batch\n", "Epoch: 3/20... Training Step: 1074... Training loss: 1.4360... 0.1222 sec/batch\n", "Epoch: 3/20... Training Step: 1075... Training loss: 1.6583... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 1076... Training loss: 1.4968... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1077... Training loss: 1.5031... 0.1245 sec/batch\n", "Epoch: 3/20... Training Step: 1078... Training loss: 1.6054... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 1079... Training loss: 1.6738... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1080... Training loss: 1.6745... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1081... Training loss: 1.7202... 0.1313 sec/batch\n", "Epoch: 3/20... Training Step: 1082... Training loss: 1.6778... 0.1264 sec/batch\n", "Epoch: 3/20... Training Step: 1083... Training loss: 1.6715... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1084... Training loss: 1.4047... 0.1207 sec/batch\n", "Epoch: 3/20... Training Step: 1085... Training loss: 1.4174... 0.1226 sec/batch\n", "Epoch: 3/20... Training Step: 1086... Training loss: 1.4059... 0.1210 sec/batch\n", "Epoch: 3/20... Training Step: 1087... Training loss: 1.4300... 0.1238 sec/batch\n", "Epoch: 3/20... Training Step: 1088... Training loss: 1.4531... 0.1197 sec/batch\n", "Epoch: 3/20... Training Step: 1089... Training loss: 1.6735... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1090... Training loss: 1.5071... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1091... Training loss: 1.6246... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1092... Training loss: 1.2634... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 1093... Training loss: 1.6045... 0.1222 sec/batch\n", "Epoch: 3/20... Training Step: 1094... Training loss: 1.5277... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1095... Training loss: 1.4848... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1096... Training loss: 1.6212... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1097... Training loss: 1.6334... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1098... Training loss: 1.6819... 0.1189 sec/batch\n", "Epoch: 3/20... Training Step: 1099... Training loss: 1.5043... 0.1238 sec/batch\n", "Epoch: 3/20... Training Step: 1100... Training loss: 1.5513... 0.1239 sec/batch\n", "Epoch: 3/20... Training Step: 1101... Training loss: 1.5325... 0.1245 sec/batch\n", "Epoch: 3/20... Training Step: 1102... Training loss: 1.5957... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1103... Training loss: 1.6104... 0.1219 sec/batch\n", "Epoch: 3/20... Training Step: 1104... Training loss: 1.3920... 0.1203 sec/batch\n", "Epoch: 3/20... Training Step: 1105... Training loss: 1.2441... 0.1195 sec/batch\n", "Epoch: 3/20... Training Step: 1106... Training loss: 1.6908... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1107... Training loss: 1.3905... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 1108... Training loss: 1.7121... 0.1222 sec/batch\n", "Epoch: 3/20... Training Step: 1109... Training loss: 1.4404... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1110... Training loss: 1.7088... 0.1201 sec/batch\n", "Epoch: 3/20... Training Step: 1111... Training loss: 1.6019... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1112... Training loss: 1.5274... 0.1190 sec/batch\n", "Epoch: 3/20... Training Step: 1113... Training loss: 1.8034... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1114... Training loss: 1.5510... 0.1219 sec/batch\n", "Epoch: 3/20... Training Step: 1115... Training loss: 1.7064... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1116... Training loss: 1.4531... 0.1190 sec/batch\n", "Epoch: 3/20... Training Step: 1117... Training loss: 1.5547... 0.1258 sec/batch\n", "Epoch: 3/20... Training Step: 1118... Training loss: 1.6288... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1119... Training loss: 1.3383... 0.1216 sec/batch\n", "Epoch: 3/20... Training Step: 1120... Training loss: 1.6835... 0.1210 sec/batch\n", "Epoch: 3/20... Training Step: 1121... Training loss: 1.5582... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 1122... Training loss: 1.4711... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 1123... Training loss: 1.5970... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1124... Training loss: 1.5486... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1125... Training loss: 1.3892... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1126... Training loss: 1.5995... 0.1194 sec/batch\n", "Epoch: 3/20... Training Step: 1127... Training loss: 1.3688... 0.1312 sec/batch\n", "Epoch: 3/20... Training Step: 1128... Training loss: 1.4986... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1129... Training loss: 1.3921... 0.1203 sec/batch\n", "Epoch: 3/20... Training Step: 1130... Training loss: 1.4580... 0.1205 sec/batch\n", "Epoch: 3/20... Training Step: 1131... Training loss: 1.5017... 0.1312 sec/batch\n", "Epoch: 3/20... Training Step: 1132... Training loss: 1.6963... 0.1323 sec/batch\n", "Epoch: 3/20... Training Step: 1133... Training loss: 1.3546... 0.1297 sec/batch\n", "Epoch: 3/20... Training Step: 1134... Training loss: 1.3314... 0.1327 sec/batch\n", "Epoch: 3/20... Training Step: 1135... Training loss: 1.5422... 0.1353 sec/batch\n", "Epoch: 3/20... Training Step: 1136... Training loss: 1.5293... 0.1274 sec/batch\n", "Epoch: 3/20... Training Step: 1137... Training loss: 1.4793... 0.1262 sec/batch\n", "Epoch: 3/20... Training Step: 1138... Training loss: 1.4585... 0.1305 sec/batch\n", "Epoch: 3/20... Training Step: 1139... Training loss: 1.1157... 0.1414 sec/batch\n", "Epoch: 3/20... Training Step: 1140... Training loss: 1.5859... 0.1278 sec/batch\n", "Epoch: 3/20... Training Step: 1141... Training loss: 1.5781... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1142... Training loss: 1.4823... 0.1214 sec/batch\n", "Epoch: 3/20... Training Step: 1143... Training loss: 1.6744... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1144... Training loss: 1.4642... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1145... Training loss: 1.5313... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1146... Training loss: 1.3649... 0.1450 sec/batch\n", "Epoch: 3/20... Training Step: 1147... Training loss: 1.6506... 0.1408 sec/batch\n", "Epoch: 3/20... Training Step: 1148... Training loss: 1.4020... 0.1336 sec/batch\n", "Epoch: 3/20... Training Step: 1149... Training loss: 1.4229... 0.1321 sec/batch\n", "Epoch: 3/20... Training Step: 1150... Training loss: 1.6742... 0.1291 sec/batch\n", "Epoch: 3/20... Training Step: 1151... Training loss: 1.6882... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1152... Training loss: 1.6954... 0.1226 sec/batch\n", "Epoch: 3/20... Training Step: 1153... Training loss: 1.5207... 0.1207 sec/batch\n", "Epoch: 3/20... Training Step: 1154... Training loss: 1.6923... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 1155... Training loss: 1.6228... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1156... Training loss: 1.3515... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1157... Training loss: 1.4203... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 1158... Training loss: 1.4987... 0.1249 sec/batch\n", "Epoch: 3/20... Training Step: 1159... Training loss: 1.4366... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1160... Training loss: 1.3728... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1161... Training loss: 1.7861... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 1162... Training loss: 1.3730... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1163... Training loss: 1.6585... 0.1241 sec/batch\n", "Epoch: 3/20... Training Step: 1164... Training loss: 1.3996... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1165... Training loss: 1.8117... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 1166... Training loss: 1.3453... 0.1259 sec/batch\n", "Epoch: 3/20... Training Step: 1167... Training loss: 1.5191... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1168... Training loss: 1.5896... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 1169... Training loss: 1.4248... 0.1253 sec/batch\n", "Epoch: 3/20... Training Step: 1170... Training loss: 1.4025... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1171... Training loss: 1.6216... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1172... Training loss: 1.5249... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1173... Training loss: 1.5229... 0.1256 sec/batch\n", "Epoch: 3/20... Training Step: 1174... Training loss: 1.3503... 0.1235 sec/batch\n", "Epoch: 3/20... Training Step: 1175... Training loss: 1.4559... 0.1283 sec/batch\n", "Epoch: 3/20... Training Step: 1176... Training loss: 1.5606... 0.1195 sec/batch\n", "Epoch: 3/20... Training Step: 1177... Training loss: 1.3951... 0.1220 sec/batch\n", "Epoch: 3/20... Training Step: 1178... Training loss: 1.4849... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1179... Training loss: 1.5060... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1180... Training loss: 1.4280... 0.1242 sec/batch\n", "Epoch: 3/20... Training Step: 1181... Training loss: 1.3011... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1182... Training loss: 1.5110... 0.1192 sec/batch\n", "Epoch: 3/20... Training Step: 1183... Training loss: 1.4393... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1184... Training loss: 1.4552... 0.1194 sec/batch\n", "Epoch: 3/20... Training Step: 1185... Training loss: 1.7136... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1186... Training loss: 1.4040... 0.1202 sec/batch\n", "Epoch: 3/20... Training Step: 1187... Training loss: 1.4646... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1188... Training loss: 1.4631... 0.1198 sec/batch\n", "Epoch: 3/20... Training Step: 1189... Training loss: 1.4965... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1190... Training loss: 1.5032... 0.1190 sec/batch\n", "Epoch: 3/20... Training Step: 1191... Training loss: 1.5201... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1192... Training loss: 1.6394... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 1193... Training loss: 1.5528... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 1194... Training loss: 1.5468... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1195... Training loss: 1.8005... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1196... Training loss: 1.5517... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1197... Training loss: 1.5621... 0.1256 sec/batch\n", "Epoch: 3/20... Training Step: 1198... Training loss: 1.7631... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1199... Training loss: 1.5714... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1200... Training loss: 1.5347... 0.1218 sec/batch\n", "Epoch: 3/20... Training Step: 1201... Training loss: 1.6003... 0.1256 sec/batch\n", "Epoch: 3/20... Training Step: 1202... Training loss: 1.4169... 0.1171 sec/batch\n", "Epoch: 3/20... Training Step: 1203... Training loss: 1.6148... 0.1161 sec/batch\n", "Epoch: 3/20... Training Step: 1204... Training loss: 1.5746... 0.1197 sec/batch\n", "Epoch: 3/20... Training Step: 1205... Training loss: 1.6521... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 1206... Training loss: 1.7894... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1207... Training loss: 1.5322... 0.1197 sec/batch\n", "Epoch: 3/20... Training Step: 1208... Training loss: 1.4790... 0.1191 sec/batch\n", "Epoch: 3/20... Training Step: 1209... Training loss: 1.3678... 0.1205 sec/batch\n", "Epoch: 3/20... Training Step: 1210... Training loss: 1.3871... 0.1266 sec/batch\n", "Epoch: 3/20... Training Step: 1211... Training loss: 1.4056... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1212... Training loss: 1.5430... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 1213... Training loss: 1.4124... 0.1227 sec/batch\n", "Epoch: 3/20... Training Step: 1214... Training loss: 1.5950... 0.1227 sec/batch\n", "Epoch: 3/20... Training Step: 1215... Training loss: 1.5371... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1216... Training loss: 1.5451... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 1217... Training loss: 1.6628... 0.1254 sec/batch\n", "Epoch: 3/20... Training Step: 1218... Training loss: 1.4933... 0.1308 sec/batch\n", "Epoch: 3/20... Training Step: 1219... Training loss: 1.4938... 0.1243 sec/batch\n", "Epoch: 3/20... Training Step: 1220... Training loss: 1.4262... 0.1183 sec/batch\n", "Epoch: 3/20... Training Step: 1221... Training loss: 1.3884... 0.1186 sec/batch\n", "Epoch: 3/20... Training Step: 1222... Training loss: 1.4537... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1223... Training loss: 1.5068... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 1224... Training loss: 1.6773... 0.1205 sec/batch\n", "Epoch: 3/20... Training Step: 1225... Training loss: 1.3959... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1226... Training loss: 1.4378... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1227... Training loss: 1.4300... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1228... Training loss: 1.5322... 0.1208 sec/batch\n", "Epoch: 3/20... Training Step: 1229... Training loss: 1.4586... 0.1243 sec/batch\n", "Epoch: 3/20... Training Step: 1230... Training loss: 1.5052... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1231... Training loss: 1.2914... 0.1227 sec/batch\n", "Epoch: 3/20... Training Step: 1232... Training loss: 1.7427... 0.1197 sec/batch\n", "Epoch: 3/20... Training Step: 1233... Training loss: 1.3229... 0.1235 sec/batch\n", "Epoch: 3/20... Training Step: 1234... Training loss: 1.5045... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1235... Training loss: 1.4541... 0.1210 sec/batch\n", "Epoch: 3/20... Training Step: 1236... Training loss: 1.8124... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1237... Training loss: 1.5099... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1238... Training loss: 1.6020... 0.1218 sec/batch\n", "Epoch: 3/20... Training Step: 1239... Training loss: 1.5292... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1240... Training loss: 1.5018... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1241... Training loss: 1.6346... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1242... Training loss: 1.4792... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1243... Training loss: 1.2944... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1244... Training loss: 1.4494... 0.1199 sec/batch\n", "Epoch: 3/20... Training Step: 1245... Training loss: 1.4722... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1246... Training loss: 1.4190... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1247... Training loss: 1.4602... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1248... Training loss: 1.3172... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1249... Training loss: 1.2813... 0.1188 sec/batch\n", "Epoch: 3/20... Training Step: 1250... Training loss: 1.4847... 0.1252 sec/batch\n", "Epoch: 3/20... Training Step: 1251... Training loss: 1.3949... 0.1244 sec/batch\n", "Epoch: 3/20... Training Step: 1252... Training loss: 1.3037... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1253... Training loss: 1.2033... 0.1338 sec/batch\n", "Epoch: 3/20... Training Step: 1254... Training loss: 1.3316... 0.1363 sec/batch\n", "Epoch: 3/20... Training Step: 1255... Training loss: 1.5470... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1256... Training loss: 1.4848... 0.1190 sec/batch\n", "Epoch: 3/20... Training Step: 1257... Training loss: 1.4451... 0.1195 sec/batch\n", "Epoch: 3/20... Training Step: 1258... Training loss: 1.5039... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1259... Training loss: 1.5019... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 1260... Training loss: 1.4659... 0.1216 sec/batch\n", "Epoch: 3/20... Training Step: 1261... Training loss: 1.2788... 0.1192 sec/batch\n", "Epoch: 3/20... Training Step: 1262... Training loss: 1.3967... 0.1267 sec/batch\n", "Epoch: 3/20... Training Step: 1263... Training loss: 1.6080... 0.1298 sec/batch\n", "Epoch: 3/20... Training Step: 1264... Training loss: 1.4442... 0.1268 sec/batch\n", "Epoch: 3/20... Training Step: 1265... Training loss: 1.4486... 0.1297 sec/batch\n", "Epoch: 3/20... Training Step: 1266... Training loss: 1.4580... 0.1258 sec/batch\n", "Epoch: 3/20... Training Step: 1267... Training loss: 1.5623... 0.1245 sec/batch\n", "Epoch: 3/20... Training Step: 1268... Training loss: 1.3603... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1269... Training loss: 1.3881... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1270... Training loss: 1.5881... 0.1202 sec/batch\n", "Epoch: 3/20... Training Step: 1271... Training loss: 1.1880... 0.1177 sec/batch\n", "Epoch: 3/20... Training Step: 1272... Training loss: 1.6003... 0.1224 sec/batch\n", "Epoch: 3/20... Training Step: 1273... Training loss: 1.3740... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1274... Training loss: 1.3288... 0.1190 sec/batch\n", "Epoch: 3/20... Training Step: 1275... Training loss: 1.2824... 0.1275 sec/batch\n", "Epoch: 3/20... Training Step: 1276... Training loss: 1.7658... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1277... Training loss: 1.2926... 0.1230 sec/batch\n", "Epoch: 3/20... Training Step: 1278... Training loss: 1.4131... 0.1203 sec/batch\n", "Epoch: 3/20... Training Step: 1279... Training loss: 1.4284... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1280... Training loss: 1.4369... 0.1240 sec/batch\n", "Epoch: 3/20... Training Step: 1281... Training loss: 1.2484... 0.1205 sec/batch\n", "Epoch: 3/20... Training Step: 1282... Training loss: 1.1394... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1283... Training loss: 1.5956... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1284... Training loss: 1.3623... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1285... Training loss: 1.3414... 0.1168 sec/batch\n", "Epoch: 3/20... Training Step: 1286... Training loss: 1.4809... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1287... Training loss: 1.6673... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1288... Training loss: 1.1649... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1289... Training loss: 1.4957... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1290... Training loss: 1.4759... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1291... Training loss: 1.4942... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1292... Training loss: 1.5007... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1293... Training loss: 1.3933... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1294... Training loss: 1.4123... 0.1174 sec/batch\n", "Epoch: 3/20... Training Step: 1295... Training loss: 1.2318... 0.1189 sec/batch\n", "Epoch: 3/20... Training Step: 1296... Training loss: 1.5183... 0.1207 sec/batch\n", "Epoch: 3/20... Training Step: 1297... Training loss: 1.4462... 0.1256 sec/batch\n", "Epoch: 3/20... Training Step: 1298... Training loss: 1.3265... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1299... Training loss: 1.2633... 0.1228 sec/batch\n", "Epoch: 3/20... Training Step: 1300... Training loss: 1.5061... 0.1218 sec/batch\n", "Epoch: 3/20... Training Step: 1301... Training loss: 1.4581... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 1302... Training loss: 1.5485... 0.1216 sec/batch\n", "Epoch: 3/20... Training Step: 1303... Training loss: 1.4608... 0.1212 sec/batch\n", "Epoch: 3/20... Training Step: 1304... Training loss: 1.3954... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1305... Training loss: 1.5705... 0.1261 sec/batch\n", "Epoch: 3/20... Training Step: 1306... Training loss: 1.4479... 0.1320 sec/batch\n", "Epoch: 3/20... Training Step: 1307... Training loss: 1.4015... 0.1288 sec/batch\n", "Epoch: 3/20... Training Step: 1308... Training loss: 1.6474... 0.1200 sec/batch\n", "Epoch: 3/20... Training Step: 1309... Training loss: 1.2750... 0.1179 sec/batch\n", "Epoch: 3/20... Training Step: 1310... Training loss: 1.3717... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1311... Training loss: 1.3306... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1312... Training loss: 1.4124... 0.1149 sec/batch\n", "Epoch: 3/20... Training Step: 1313... Training loss: 1.3034... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 1314... Training loss: 1.4703... 0.1244 sec/batch\n", "Epoch: 3/20... Training Step: 1315... Training loss: 1.4667... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1316... Training loss: 1.2582... 0.1288 sec/batch\n", "Epoch: 3/20... Training Step: 1317... Training loss: 1.3367... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1318... Training loss: 1.3607... 0.1178 sec/batch\n", "Epoch: 3/20... Training Step: 1319... Training loss: 1.2614... 0.1231 sec/batch\n", "Epoch: 3/20... Training Step: 1320... Training loss: 1.2499... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1321... Training loss: 1.3843... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1322... Training loss: 1.4731... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1323... Training loss: 1.3160... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1324... Training loss: 1.5521... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1325... Training loss: 1.2794... 0.1243 sec/batch\n", "Epoch: 3/20... Training Step: 1326... Training loss: 1.2844... 0.1232 sec/batch\n", "Epoch: 3/20... Training Step: 1327... Training loss: 1.3642... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1328... Training loss: 1.3242... 0.1219 sec/batch\n", "Epoch: 3/20... Training Step: 1329... Training loss: 1.4141... 0.1227 sec/batch\n", "Epoch: 3/20... Training Step: 1330... Training loss: 1.3989... 0.1258 sec/batch\n", "Epoch: 3/20... Training Step: 1331... Training loss: 1.3728... 0.1221 sec/batch\n", "Epoch: 3/20... Training Step: 1332... Training loss: 1.4453... 0.1225 sec/batch\n", "Epoch: 3/20... Training Step: 1333... Training loss: 1.4572... 0.1202 sec/batch\n", "Epoch: 3/20... Training Step: 1334... Training loss: 1.6580... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1335... Training loss: 1.4955... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1336... Training loss: 1.7907... 0.1191 sec/batch\n", "Epoch: 3/20... Training Step: 1337... Training loss: 1.4268... 0.1239 sec/batch\n", "Epoch: 3/20... Training Step: 1338... Training loss: 1.3740... 0.1220 sec/batch\n", "Epoch: 3/20... Training Step: 1339... Training loss: 1.2539... 0.1243 sec/batch\n", "Epoch: 3/20... Training Step: 1340... Training loss: 1.4921... 0.1291 sec/batch\n", "Epoch: 3/20... Training Step: 1341... Training loss: 1.4860... 0.1239 sec/batch\n", "Epoch: 3/20... Training Step: 1342... Training loss: 1.4720... 0.1208 sec/batch\n", "Epoch: 3/20... Training Step: 1343... Training loss: 1.6715... 0.1269 sec/batch\n", "Epoch: 3/20... Training Step: 1344... Training loss: 1.7732... 0.1307 sec/batch\n", "Epoch: 3/20... Training Step: 1345... Training loss: 1.5820... 0.1327 sec/batch\n", "Epoch: 3/20... Training Step: 1346... Training loss: 1.3372... 0.1170 sec/batch\n", "Epoch: 3/20... Training Step: 1347... Training loss: 1.6558... 0.1314 sec/batch\n", "Epoch: 3/20... Training Step: 1348... Training loss: 1.3329... 0.1218 sec/batch\n", "Epoch: 3/20... Training Step: 1349... Training loss: 1.5555... 0.1188 sec/batch\n", "Epoch: 3/20... Training Step: 1350... Training loss: 1.5288... 0.1314 sec/batch\n", "Epoch: 3/20... Training Step: 1351... Training loss: 1.5522... 0.1235 sec/batch\n", "Epoch: 3/20... Training Step: 1352... Training loss: 1.5946... 0.1249 sec/batch\n", "Epoch: 3/20... Training Step: 1353... Training loss: 1.3923... 0.1265 sec/batch\n", "Epoch: 3/20... Training Step: 1354... Training loss: 1.5529... 0.1299 sec/batch\n", "Epoch: 3/20... Training Step: 1355... Training loss: 1.5271... 0.1359 sec/batch\n", "Epoch: 3/20... Training Step: 1356... Training loss: 1.5991... 0.1324 sec/batch\n", "Epoch: 3/20... Training Step: 1357... Training loss: 1.4104... 0.1308 sec/batch\n", "Epoch: 3/20... Training Step: 1358... Training loss: 1.4555... 0.1285 sec/batch\n", "Epoch: 3/20... Training Step: 1359... Training loss: 1.6443... 0.1335 sec/batch\n", "Epoch: 3/20... Training Step: 1360... Training loss: 1.3999... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1361... Training loss: 1.7414... 0.1229 sec/batch\n", "Epoch: 3/20... Training Step: 1362... Training loss: 1.4795... 0.1209 sec/batch\n", "Epoch: 3/20... Training Step: 1363... Training loss: 1.3346... 0.1194 sec/batch\n", "Epoch: 3/20... Training Step: 1364... Training loss: 1.5552... 0.1215 sec/batch\n", "Epoch: 3/20... Training Step: 1365... Training loss: 1.3470... 0.1204 sec/batch\n", "Epoch: 3/20... Training Step: 1366... Training loss: 1.6726... 0.1196 sec/batch\n", "Epoch: 3/20... Training Step: 1367... Training loss: 1.7311... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1368... Training loss: 1.7849... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1369... Training loss: 1.5782... 0.1246 sec/batch\n", "Epoch: 3/20... Training Step: 1370... Training loss: 1.4696... 0.1254 sec/batch\n", "Epoch: 3/20... Training Step: 1371... Training loss: 1.5873... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1372... Training loss: 1.5114... 0.1223 sec/batch\n", "Epoch: 3/20... Training Step: 1373... Training loss: 1.4033... 0.1171 sec/batch\n", "Epoch: 3/20... Training Step: 1374... Training loss: 1.3788... 0.1273 sec/batch\n", "Epoch: 3/20... Training Step: 1375... Training loss: 1.5542... 0.1250 sec/batch\n", "Epoch: 3/20... Training Step: 1376... Training loss: 1.2138... 0.1211 sec/batch\n", "Epoch: 3/20... Training Step: 1377... Training loss: 1.3873... 0.1206 sec/batch\n", "Epoch: 3/20... Training Step: 1378... Training loss: 1.4237... 0.1236 sec/batch\n", "Epoch: 3/20... Training Step: 1379... Training loss: 1.4807... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 1380... Training loss: 1.4350... 0.1270 sec/batch\n", "Epoch: 3/20... Training Step: 1381... Training loss: 1.3911... 0.1233 sec/batch\n", "Epoch: 3/20... Training Step: 1382... Training loss: 1.5712... 0.1264 sec/batch\n", "Epoch: 3/20... Training Step: 1383... Training loss: 1.5879... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1384... Training loss: 1.3152... 0.1237 sec/batch\n", "Epoch: 3/20... Training Step: 1385... Training loss: 1.3861... 0.1255 sec/batch\n", "Epoch: 3/20... Training Step: 1386... Training loss: 1.2745... 0.1213 sec/batch\n", "Epoch: 3/20... Training Step: 1387... Training loss: 1.1810... 0.1248 sec/batch\n", "Epoch: 3/20... Training Step: 1388... Training loss: 1.3882... 0.1251 sec/batch\n", "Epoch: 3/20... Training Step: 1389... Training loss: 1.5194... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1390... Training loss: 1.4200... 0.1217 sec/batch\n", "Epoch: 3/20... Training Step: 1391... Training loss: 1.4400... 0.1234 sec/batch\n", "Epoch: 3/20... Training Step: 1392... Training loss: 1.3359... 0.1225 sec/batch\n", "Epoch: 4/20... Training Step: 1393... Training loss: 1.9313... 0.1260 sec/batch\n", "Epoch: 4/20... Training Step: 1394... Training loss: 1.4922... 0.1188 sec/batch\n", "Epoch: 4/20... Training Step: 1395... Training loss: 1.4301... 0.1268 sec/batch\n", "Epoch: 4/20... Training Step: 1396... Training loss: 1.3722... 0.1196 sec/batch\n", "Epoch: 4/20... Training Step: 1397... Training loss: 1.4674... 0.1252 sec/batch\n", "Epoch: 4/20... Training Step: 1398... Training loss: 1.2833... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1399... Training loss: 1.4789... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1400... Training loss: 1.3207... 0.1296 sec/batch\n", "Epoch: 4/20... Training Step: 1401... Training loss: 1.2971... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1402... Training loss: 1.6051... 0.1176 sec/batch\n", "Epoch: 4/20... Training Step: 1403... Training loss: 1.3909... 0.1229 sec/batch\n", "Epoch: 4/20... Training Step: 1404... Training loss: 1.1811... 0.1225 sec/batch\n", "Epoch: 4/20... Training Step: 1405... Training loss: 1.6804... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1406... Training loss: 1.2042... 0.1191 sec/batch\n", "Epoch: 4/20... Training Step: 1407... Training loss: 1.3924... 0.1253 sec/batch\n", "Epoch: 4/20... Training Step: 1408... Training loss: 1.4801... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1409... Training loss: 1.3012... 0.1255 sec/batch\n", "Epoch: 4/20... Training Step: 1410... Training loss: 1.2288... 0.1194 sec/batch\n", "Epoch: 4/20... Training Step: 1411... Training loss: 1.3916... 0.1194 sec/batch\n", "Epoch: 4/20... Training Step: 1412... Training loss: 1.2628... 0.1217 sec/batch\n", "Epoch: 4/20... Training Step: 1413... Training loss: 1.3941... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1414... Training loss: 1.3971... 0.1188 sec/batch\n", "Epoch: 4/20... Training Step: 1415... Training loss: 1.6025... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1416... Training loss: 1.2473... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1417... Training loss: 1.3737... 0.1197 sec/batch\n", "Epoch: 4/20... Training Step: 1418... Training loss: 1.3562... 0.1182 sec/batch\n", "Epoch: 4/20... Training Step: 1419... Training loss: 1.4493... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1420... Training loss: 1.2747... 0.1222 sec/batch\n", "Epoch: 4/20... Training Step: 1421... Training loss: 1.3746... 0.1236 sec/batch\n", "Epoch: 4/20... Training Step: 1422... Training loss: 1.3678... 0.1167 sec/batch\n", "Epoch: 4/20... Training Step: 1423... Training loss: 1.2374... 0.1255 sec/batch\n", "Epoch: 4/20... Training Step: 1424... Training loss: 1.3995... 0.1181 sec/batch\n", "Epoch: 4/20... Training Step: 1425... Training loss: 1.2729... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1426... Training loss: 1.2481... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1427... Training loss: 1.3059... 0.1253 sec/batch\n", "Epoch: 4/20... Training Step: 1428... Training loss: 1.2463... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1429... Training loss: 1.4052... 0.1252 sec/batch\n", "Epoch: 4/20... Training Step: 1430... Training loss: 1.1703... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1431... Training loss: 1.2630... 0.1275 sec/batch\n", "Epoch: 4/20... Training Step: 1432... Training loss: 1.5672... 0.1254 sec/batch\n", "Epoch: 4/20... Training Step: 1433... Training loss: 1.3659... 0.1217 sec/batch\n", "Epoch: 4/20... Training Step: 1434... Training loss: 1.2535... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1435... Training loss: 1.5014... 0.1286 sec/batch\n", "Epoch: 4/20... Training Step: 1436... Training loss: 1.0834... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1437... Training loss: 1.3498... 0.1297 sec/batch\n", "Epoch: 4/20... Training Step: 1438... Training loss: 1.1832... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1439... Training loss: 1.3530... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1440... Training loss: 1.3427... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1441... Training loss: 1.2966... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1442... Training loss: 1.4366... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1443... Training loss: 1.3389... 0.1235 sec/batch\n", "Epoch: 4/20... Training Step: 1444... Training loss: 1.4836... 0.1181 sec/batch\n", "Epoch: 4/20... Training Step: 1445... Training loss: 1.4737... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1446... Training loss: 1.3881... 0.1197 sec/batch\n", "Epoch: 4/20... Training Step: 1447... Training loss: 1.2272... 0.1262 sec/batch\n", "Epoch: 4/20... Training Step: 1448... Training loss: 1.4611... 0.1356 sec/batch\n", "Epoch: 4/20... Training Step: 1449... Training loss: 1.4331... 0.1266 sec/batch\n", "Epoch: 4/20... Training Step: 1450... Training loss: 1.3789... 0.1190 sec/batch\n", "Epoch: 4/20... Training Step: 1451... Training loss: 1.2747... 0.1268 sec/batch\n", "Epoch: 4/20... Training Step: 1452... Training loss: 1.2960... 0.1232 sec/batch\n", "Epoch: 4/20... Training Step: 1453... Training loss: 1.4640... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1454... Training loss: 1.4305... 0.1203 sec/batch\n", "Epoch: 4/20... Training Step: 1455... Training loss: 1.3445... 0.1195 sec/batch\n", "Epoch: 4/20... Training Step: 1456... Training loss: 1.3732... 0.1236 sec/batch\n", "Epoch: 4/20... Training Step: 1457... Training loss: 1.1927... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1458... Training loss: 1.5063... 0.1196 sec/batch\n", "Epoch: 4/20... Training Step: 1459... Training loss: 1.4179... 0.1225 sec/batch\n", "Epoch: 4/20... Training Step: 1460... Training loss: 1.4725... 0.1216 sec/batch\n", "Epoch: 4/20... Training Step: 1461... Training loss: 1.2833... 0.1218 sec/batch\n", "Epoch: 4/20... Training Step: 1462... Training loss: 1.3418... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1463... Training loss: 1.3958... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1464... Training loss: 1.3670... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1465... Training loss: 1.4485... 0.1194 sec/batch\n", "Epoch: 4/20... Training Step: 1466... Training loss: 1.3058... 0.1264 sec/batch\n", "Epoch: 4/20... Training Step: 1467... Training loss: 1.6422... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1468... Training loss: 1.3307... 0.1223 sec/batch\n", "Epoch: 4/20... Training Step: 1469... Training loss: 1.2994... 0.1323 sec/batch\n", "Epoch: 4/20... Training Step: 1470... Training loss: 1.4384... 0.1281 sec/batch\n", "Epoch: 4/20... Training Step: 1471... Training loss: 1.3853... 0.1251 sec/batch\n", "Epoch: 4/20... Training Step: 1472... Training loss: 1.2819... 0.1274 sec/batch\n", "Epoch: 4/20... Training Step: 1473... Training loss: 1.5100... 0.1250 sec/batch\n", "Epoch: 4/20... Training Step: 1474... Training loss: 1.3332... 0.1283 sec/batch\n", "Epoch: 4/20... Training Step: 1475... Training loss: 1.2866... 0.1289 sec/batch\n", "Epoch: 4/20... Training Step: 1476... Training loss: 1.4128... 0.1186 sec/batch\n", "Epoch: 4/20... Training Step: 1477... Training loss: 1.4052... 0.1275 sec/batch\n", "Epoch: 4/20... Training Step: 1478... Training loss: 1.4796... 0.1229 sec/batch\n", "Epoch: 4/20... Training Step: 1479... Training loss: 1.2473... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1480... Training loss: 1.5011... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1481... Training loss: 1.5194... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1482... Training loss: 1.3242... 0.1188 sec/batch\n", "Epoch: 4/20... Training Step: 1483... Training loss: 1.5245... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1484... Training loss: 1.6861... 0.1270 sec/batch\n", "Epoch: 4/20... Training Step: 1485... Training loss: 1.2433... 0.1218 sec/batch\n", "Epoch: 4/20... Training Step: 1486... Training loss: 1.5423... 0.1188 sec/batch\n", "Epoch: 4/20... Training Step: 1487... Training loss: 1.3283... 0.1232 sec/batch\n", "Epoch: 4/20... Training Step: 1488... Training loss: 1.3628... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1489... Training loss: 1.7787... 0.1226 sec/batch\n", "Epoch: 4/20... Training Step: 1490... Training loss: 1.5023... 0.1201 sec/batch\n", "Epoch: 4/20... Training Step: 1491... Training loss: 1.5314... 0.1214 sec/batch\n", "Epoch: 4/20... Training Step: 1492... Training loss: 1.3412... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1493... Training loss: 1.4585... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1494... Training loss: 1.6980... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1495... Training loss: 1.6606... 0.1256 sec/batch\n", "Epoch: 4/20... Training Step: 1496... Training loss: 1.5003... 0.1248 sec/batch\n", "Epoch: 4/20... Training Step: 1497... Training loss: 1.6203... 0.1218 sec/batch\n", "Epoch: 4/20... Training Step: 1498... Training loss: 1.7408... 0.1183 sec/batch\n", "Epoch: 4/20... Training Step: 1499... Training loss: 1.6151... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1500... Training loss: 1.9297... 0.1235 sec/batch\n", "Epoch: 4/20... Training Step: 1501... Training loss: 1.6583... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1502... Training loss: 1.3600... 0.1203 sec/batch\n", "Epoch: 4/20... Training Step: 1503... Training loss: 1.5712... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1504... Training loss: 1.4967... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1505... Training loss: 1.6027... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1506... Training loss: 1.6185... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1507... Training loss: 1.5903... 0.1201 sec/batch\n", "Epoch: 4/20... Training Step: 1508... Training loss: 1.3800... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1509... Training loss: 1.5354... 0.1228 sec/batch\n", "Epoch: 4/20... Training Step: 1510... Training loss: 1.6983... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1511... Training loss: 1.4242... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1512... Training loss: 1.2859... 0.1217 sec/batch\n", "Epoch: 4/20... Training Step: 1513... Training loss: 1.5107... 0.1242 sec/batch\n", "Epoch: 4/20... Training Step: 1514... Training loss: 1.4637... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1515... Training loss: 1.6800... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1516... Training loss: 1.5276... 0.1198 sec/batch\n", "Epoch: 4/20... Training Step: 1517... Training loss: 1.5649... 0.1181 sec/batch\n", "Epoch: 4/20... Training Step: 1518... Training loss: 1.2967... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1519... Training loss: 1.3368... 0.1250 sec/batch\n", "Epoch: 4/20... Training Step: 1520... Training loss: 1.4736... 0.1232 sec/batch\n", "Epoch: 4/20... Training Step: 1521... Training loss: 1.6530... 0.1236 sec/batch\n", "Epoch: 4/20... Training Step: 1522... Training loss: 1.4966... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1523... Training loss: 1.7429... 0.1173 sec/batch\n", "Epoch: 4/20... Training Step: 1524... Training loss: 1.5686... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1525... Training loss: 1.3538... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1526... Training loss: 1.5236... 0.1212 sec/batch\n", "Epoch: 4/20... Training Step: 1527... Training loss: 1.2845... 0.1264 sec/batch\n", "Epoch: 4/20... Training Step: 1528... Training loss: 1.2576... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1529... Training loss: 1.2262... 0.1259 sec/batch\n", "Epoch: 4/20... Training Step: 1530... Training loss: 1.3750... 0.1197 sec/batch\n", "Epoch: 4/20... Training Step: 1531... Training loss: 1.2956... 0.1232 sec/batch\n", "Epoch: 4/20... Training Step: 1532... Training loss: 1.3355... 0.1191 sec/batch\n", "Epoch: 4/20... Training Step: 1533... Training loss: 1.3361... 0.1202 sec/batch\n", "Epoch: 4/20... Training Step: 1534... Training loss: 1.3512... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1535... Training loss: 1.1822... 0.1271 sec/batch\n", "Epoch: 4/20... Training Step: 1536... Training loss: 1.4463... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1537... Training loss: 1.4729... 0.1215 sec/batch\n", "Epoch: 4/20... Training Step: 1538... Training loss: 1.3268... 0.1181 sec/batch\n", "Epoch: 4/20... Training Step: 1539... Training loss: 1.5076... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1540... Training loss: 1.2768... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1541... Training loss: 1.3413... 0.1303 sec/batch\n", "Epoch: 4/20... Training Step: 1542... Training loss: 1.6151... 0.1289 sec/batch\n", "Epoch: 4/20... Training Step: 1543... Training loss: 1.4276... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1544... Training loss: 1.5594... 0.1185 sec/batch\n", "Epoch: 4/20... Training Step: 1545... Training loss: 1.6299... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1546... Training loss: 1.4598... 0.1215 sec/batch\n", "Epoch: 4/20... Training Step: 1547... Training loss: 1.5796... 0.1202 sec/batch\n", "Epoch: 4/20... Training Step: 1548... Training loss: 1.2760... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1549... Training loss: 1.3051... 0.1261 sec/batch\n", "Epoch: 4/20... Training Step: 1550... Training loss: 1.3215... 0.1242 sec/batch\n", "Epoch: 4/20... Training Step: 1551... Training loss: 1.3592... 0.1184 sec/batch\n", "Epoch: 4/20... Training Step: 1552... Training loss: 1.3590... 0.1255 sec/batch\n", "Epoch: 4/20... Training Step: 1553... Training loss: 1.4959... 0.1265 sec/batch\n", "Epoch: 4/20... Training Step: 1554... Training loss: 1.4831... 0.1190 sec/batch\n", "Epoch: 4/20... Training Step: 1555... Training loss: 1.5494... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1556... Training loss: 1.2590... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1557... Training loss: 1.4489... 0.1272 sec/batch\n", "Epoch: 4/20... Training Step: 1558... Training loss: 1.3497... 0.1266 sec/batch\n", "Epoch: 4/20... Training Step: 1559... Training loss: 1.3712... 0.1186 sec/batch\n", "Epoch: 4/20... Training Step: 1560... Training loss: 1.5263... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1561... Training loss: 1.4972... 0.1242 sec/batch\n", "Epoch: 4/20... Training Step: 1562... Training loss: 1.5029... 0.1242 sec/batch\n", "Epoch: 4/20... Training Step: 1563... Training loss: 1.3780... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1564... Training loss: 1.5499... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1565... Training loss: 1.3015... 0.1164 sec/batch\n", "Epoch: 4/20... Training Step: 1566... Training loss: 1.4424... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1567... Training loss: 1.4742... 0.1179 sec/batch\n", "Epoch: 4/20... Training Step: 1568... Training loss: 1.2731... 0.1262 sec/batch\n", "Epoch: 4/20... Training Step: 1569... Training loss: 1.2498... 0.1218 sec/batch\n", "Epoch: 4/20... Training Step: 1570... Training loss: 1.6188... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1571... Training loss: 1.3378... 0.1189 sec/batch\n", "Epoch: 4/20... Training Step: 1572... Training loss: 1.5607... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1573... Training loss: 1.3539... 0.1270 sec/batch\n", "Epoch: 4/20... Training Step: 1574... Training loss: 1.6304... 0.1174 sec/batch\n", "Epoch: 4/20... Training Step: 1575... Training loss: 1.4477... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1576... Training loss: 1.4303... 0.1214 sec/batch\n", "Epoch: 4/20... Training Step: 1577... Training loss: 1.6332... 0.1256 sec/batch\n", "Epoch: 4/20... Training Step: 1578... Training loss: 1.4556... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1579... Training loss: 1.5490... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1580... Training loss: 1.2421... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1581... Training loss: 1.4559... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1582... Training loss: 1.2966... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1583... Training loss: 1.2735... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1584... Training loss: 1.5259... 0.1202 sec/batch\n", "Epoch: 4/20... Training Step: 1585... Training loss: 1.4117... 0.1178 sec/batch\n", "Epoch: 4/20... Training Step: 1586... Training loss: 1.4221... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1587... Training loss: 1.5599... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1588... Training loss: 1.3522... 0.1262 sec/batch\n", "Epoch: 4/20... Training Step: 1589... Training loss: 1.2736... 0.1309 sec/batch\n", "Epoch: 4/20... Training Step: 1590... Training loss: 1.4886... 0.1279 sec/batch\n", "Epoch: 4/20... Training Step: 1591... Training loss: 1.2435... 0.1349 sec/batch\n", "Epoch: 4/20... Training Step: 1592... Training loss: 1.2611... 0.1300 sec/batch\n", "Epoch: 4/20... Training Step: 1593... Training loss: 1.3498... 0.1319 sec/batch\n", "Epoch: 4/20... Training Step: 1594... Training loss: 1.3829... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1595... Training loss: 1.3758... 0.1255 sec/batch\n", "Epoch: 4/20... Training Step: 1596... Training loss: 1.5544... 0.1204 sec/batch\n", "Epoch: 4/20... Training Step: 1597... Training loss: 1.3693... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1598... Training loss: 1.2057... 0.1349 sec/batch\n", "Epoch: 4/20... Training Step: 1599... Training loss: 1.2627... 0.1315 sec/batch\n", "Epoch: 4/20... Training Step: 1600... Training loss: 1.4666... 0.1432 sec/batch\n", "Epoch: 4/20... Training Step: 1601... Training loss: 1.4690... 0.1338 sec/batch\n", "Epoch: 4/20... Training Step: 1602... Training loss: 1.3161... 0.1553 sec/batch\n", "Epoch: 4/20... Training Step: 1603... Training loss: 1.0884... 0.1282 sec/batch\n", "Epoch: 4/20... Training Step: 1604... Training loss: 1.4143... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1605... Training loss: 1.5037... 0.1251 sec/batch\n", "Epoch: 4/20... Training Step: 1606... Training loss: 1.4227... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1607... Training loss: 1.5015... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1608... Training loss: 1.3333... 0.1235 sec/batch\n", "Epoch: 4/20... Training Step: 1609... Training loss: 1.5212... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1610... Training loss: 1.2993... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1611... Training loss: 1.5958... 0.1299 sec/batch\n", "Epoch: 4/20... Training Step: 1612... Training loss: 1.3819... 0.1300 sec/batch\n", "Epoch: 4/20... Training Step: 1613... Training loss: 1.2495... 0.1296 sec/batch\n", "Epoch: 4/20... Training Step: 1614... Training loss: 1.6065... 0.1306 sec/batch\n", "Epoch: 4/20... Training Step: 1615... Training loss: 1.6064... 0.1313 sec/batch\n", "Epoch: 4/20... Training Step: 1616... Training loss: 1.5914... 0.1289 sec/batch\n", "Epoch: 4/20... Training Step: 1617... Training loss: 1.3934... 0.1289 sec/batch\n", "Epoch: 4/20... Training Step: 1618... Training loss: 1.6287... 0.1289 sec/batch\n", "Epoch: 4/20... Training Step: 1619... Training loss: 1.5575... 0.1284 sec/batch\n", "Epoch: 4/20... Training Step: 1620... Training loss: 1.2240... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1621... Training loss: 1.4031... 0.1268 sec/batch\n", "Epoch: 4/20... Training Step: 1622... Training loss: 1.3496... 0.1323 sec/batch\n", "Epoch: 4/20... Training Step: 1623... Training loss: 1.3593... 0.1295 sec/batch\n", "Epoch: 4/20... Training Step: 1624... Training loss: 1.3055... 0.1310 sec/batch\n", "Epoch: 4/20... Training Step: 1625... Training loss: 1.6255... 0.1297 sec/batch\n", "Epoch: 4/20... Training Step: 1626... Training loss: 1.3199... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1627... Training loss: 1.6438... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1628... Training loss: 1.3492... 0.1222 sec/batch\n", "Epoch: 4/20... Training Step: 1629... Training loss: 1.7020... 0.1184 sec/batch\n", "Epoch: 4/20... Training Step: 1630... Training loss: 1.3203... 0.1252 sec/batch\n", "Epoch: 4/20... Training Step: 1631... Training loss: 1.4881... 0.1253 sec/batch\n", "Epoch: 4/20... Training Step: 1632... Training loss: 1.5097... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1633... Training loss: 1.4173... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1634... Training loss: 1.3100... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1635... Training loss: 1.6259... 0.1226 sec/batch\n", "Epoch: 4/20... Training Step: 1636... Training loss: 1.5558... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1637... Training loss: 1.5569... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1638... Training loss: 1.3143... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1639... Training loss: 1.2947... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1640... Training loss: 1.4623... 0.1248 sec/batch\n", "Epoch: 4/20... Training Step: 1641... Training loss: 1.3586... 0.1394 sec/batch\n", "Epoch: 4/20... Training Step: 1642... Training loss: 1.3476... 0.1464 sec/batch\n", "Epoch: 4/20... Training Step: 1643... Training loss: 1.4967... 0.1290 sec/batch\n", "Epoch: 4/20... Training Step: 1644... Training loss: 1.3591... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1645... Training loss: 1.2847... 0.1208 sec/batch\n", "Epoch: 4/20... Training Step: 1646... Training loss: 1.5421... 0.1176 sec/batch\n", "Epoch: 4/20... Training Step: 1647... Training loss: 1.3966... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1648... Training loss: 1.3962... 0.1202 sec/batch\n", "Epoch: 4/20... Training Step: 1649... Training loss: 1.4429... 0.1268 sec/batch\n", "Epoch: 4/20... Training Step: 1650... Training loss: 1.3419... 0.1183 sec/batch\n", "Epoch: 4/20... Training Step: 1651... Training loss: 1.3109... 0.1250 sec/batch\n", "Epoch: 4/20... Training Step: 1652... Training loss: 1.4374... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1653... Training loss: 1.3746... 0.1232 sec/batch\n", "Epoch: 4/20... Training Step: 1654... Training loss: 1.5243... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1655... Training loss: 1.4850... 0.1235 sec/batch\n", "Epoch: 4/20... Training Step: 1656... Training loss: 1.5935... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1657... Training loss: 1.5334... 0.1215 sec/batch\n", "Epoch: 4/20... Training Step: 1658... Training loss: 1.4143... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1659... Training loss: 1.5740... 0.1208 sec/batch\n", "Epoch: 4/20... Training Step: 1660... Training loss: 1.5156... 0.1212 sec/batch\n", "Epoch: 4/20... Training Step: 1661... Training loss: 1.4966... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1662... Training loss: 1.5426... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1663... Training loss: 1.4888... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1664... Training loss: 1.5367... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1665... Training loss: 1.5524... 0.1247 sec/batch\n", "Epoch: 4/20... Training Step: 1666... Training loss: 1.4987... 0.1335 sec/batch\n", "Epoch: 4/20... Training Step: 1667... Training loss: 1.6245... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1668... Training loss: 1.4191... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1669... Training loss: 1.4755... 0.1256 sec/batch\n", "Epoch: 4/20... Training Step: 1670... Training loss: 1.7585... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1671... Training loss: 1.4467... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1672... Training loss: 1.3123... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1673... Training loss: 1.3756... 0.1251 sec/batch\n", "Epoch: 4/20... Training Step: 1674... Training loss: 1.2536... 0.1206 sec/batch\n", "Epoch: 4/20... Training Step: 1675... Training loss: 1.3763... 0.1223 sec/batch\n", "Epoch: 4/20... Training Step: 1676... Training loss: 1.4851... 0.1184 sec/batch\n", "Epoch: 4/20... Training Step: 1677... Training loss: 1.2984... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1678... Training loss: 1.4871... 0.1206 sec/batch\n", "Epoch: 4/20... Training Step: 1679... Training loss: 1.3846... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1680... Training loss: 1.5179... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1681... Training loss: 1.4800... 0.1216 sec/batch\n", "Epoch: 4/20... Training Step: 1682... Training loss: 1.4224... 0.1206 sec/batch\n", "Epoch: 4/20... Training Step: 1683... Training loss: 1.3142... 0.1271 sec/batch\n", "Epoch: 4/20... Training Step: 1684... Training loss: 1.2903... 0.1236 sec/batch\n", "Epoch: 4/20... Training Step: 1685... Training loss: 1.2910... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1686... Training loss: 1.4233... 0.1226 sec/batch\n", "Epoch: 4/20... Training Step: 1687... Training loss: 1.3648... 0.1250 sec/batch\n", "Epoch: 4/20... Training Step: 1688... Training loss: 1.6586... 0.1287 sec/batch\n", "Epoch: 4/20... Training Step: 1689... Training loss: 1.3122... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1690... Training loss: 1.3081... 0.1304 sec/batch\n", "Epoch: 4/20... Training Step: 1691... Training loss: 1.3743... 0.1287 sec/batch\n", "Epoch: 4/20... Training Step: 1692... Training loss: 1.3375... 0.1274 sec/batch\n", "Epoch: 4/20... Training Step: 1693... Training loss: 1.3612... 0.1261 sec/batch\n", "Epoch: 4/20... Training Step: 1694... Training loss: 1.2892... 0.1288 sec/batch\n", "Epoch: 4/20... Training Step: 1695... Training loss: 1.1671... 0.1340 sec/batch\n", "Epoch: 4/20... Training Step: 1696... Training loss: 1.5735... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1697... Training loss: 1.3152... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1698... Training loss: 1.4739... 0.1179 sec/batch\n", "Epoch: 4/20... Training Step: 1699... Training loss: 1.3768... 0.1204 sec/batch\n", "Epoch: 4/20... Training Step: 1700... Training loss: 1.8189... 0.1197 sec/batch\n", "Epoch: 4/20... Training Step: 1701... Training loss: 1.4422... 0.1188 sec/batch\n", "Epoch: 4/20... Training Step: 1702... Training loss: 1.4908... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1703... Training loss: 1.3663... 0.1165 sec/batch\n", "Epoch: 4/20... Training Step: 1704... Training loss: 1.2265... 0.1173 sec/batch\n", "Epoch: 4/20... Training Step: 1705... Training loss: 1.3273... 0.1172 sec/batch\n", "Epoch: 4/20... Training Step: 1706... Training loss: 1.2965... 0.1192 sec/batch\n", "Epoch: 4/20... Training Step: 1707... Training loss: 1.1678... 0.1235 sec/batch\n", "Epoch: 4/20... Training Step: 1708... Training loss: 1.3140... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1709... Training loss: 1.4159... 0.1265 sec/batch\n", "Epoch: 4/20... Training Step: 1710... Training loss: 1.2664... 0.1191 sec/batch\n", "Epoch: 4/20... Training Step: 1711... Training loss: 1.3747... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1712... Training loss: 1.2646... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1713... Training loss: 1.2504... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1714... Training loss: 1.5354... 0.1215 sec/batch\n", "Epoch: 4/20... Training Step: 1715... Training loss: 1.3490... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1716... Training loss: 1.2395... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1717... Training loss: 1.2260... 0.1177 sec/batch\n", "Epoch: 4/20... Training Step: 1718... Training loss: 1.1965... 0.1230 sec/batch\n", "Epoch: 4/20... Training Step: 1719... Training loss: 1.4599... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1720... Training loss: 1.3538... 0.1203 sec/batch\n", "Epoch: 4/20... Training Step: 1721... Training loss: 1.4356... 0.1214 sec/batch\n", "Epoch: 4/20... Training Step: 1722... Training loss: 1.2716... 0.1216 sec/batch\n", "Epoch: 4/20... Training Step: 1723... Training loss: 1.3971... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1724... Training loss: 1.3930... 0.1184 sec/batch\n", "Epoch: 4/20... Training Step: 1725... Training loss: 1.3373... 0.1242 sec/batch\n", "Epoch: 4/20... Training Step: 1726... Training loss: 1.3501... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1727... Training loss: 1.5114... 0.1196 sec/batch\n", "Epoch: 4/20... Training Step: 1728... Training loss: 1.3357... 0.1212 sec/batch\n", "Epoch: 4/20... Training Step: 1729... Training loss: 1.4122... 0.1229 sec/batch\n", "Epoch: 4/20... Training Step: 1730... Training loss: 1.3890... 0.1264 sec/batch\n", "Epoch: 4/20... Training Step: 1731... Training loss: 1.4253... 0.1228 sec/batch\n", "Epoch: 4/20... Training Step: 1732... Training loss: 1.2306... 0.1203 sec/batch\n", "Epoch: 4/20... Training Step: 1733... Training loss: 1.2428... 0.1333 sec/batch\n", "Epoch: 4/20... Training Step: 1734... Training loss: 1.4867... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1735... Training loss: 1.1870... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1736... Training loss: 1.4857... 0.1272 sec/batch\n", "Epoch: 4/20... Training Step: 1737... Training loss: 1.2644... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1738... Training loss: 1.2092... 0.1258 sec/batch\n", "Epoch: 4/20... Training Step: 1739... Training loss: 1.0920... 0.1264 sec/batch\n", "Epoch: 4/20... Training Step: 1740... Training loss: 1.6339... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1741... Training loss: 1.2019... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1742... Training loss: 1.3660... 0.1203 sec/batch\n", "Epoch: 4/20... Training Step: 1743... Training loss: 1.2780... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1744... Training loss: 1.1986... 0.1247 sec/batch\n", "Epoch: 4/20... Training Step: 1745... Training loss: 1.2250... 0.1214 sec/batch\n", "Epoch: 4/20... Training Step: 1746... Training loss: 1.0924... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1747... Training loss: 1.4161... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1748... Training loss: 1.2304... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1749... Training loss: 1.2339... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1750... Training loss: 1.3953... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1751... Training loss: 1.5165... 0.1247 sec/batch\n", "Epoch: 4/20... Training Step: 1752... Training loss: 1.0447... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1753... Training loss: 1.5157... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1754... Training loss: 1.4152... 0.1217 sec/batch\n", "Epoch: 4/20... Training Step: 1755... Training loss: 1.2556... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1756... Training loss: 1.3216... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1757... Training loss: 1.2850... 0.1195 sec/batch\n", "Epoch: 4/20... Training Step: 1758... Training loss: 1.3210... 0.1210 sec/batch\n", "Epoch: 4/20... Training Step: 1759... Training loss: 1.2160... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1760... Training loss: 1.4533... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1761... Training loss: 1.4620... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1762... Training loss: 1.3681... 0.1226 sec/batch\n", "Epoch: 4/20... Training Step: 1763... Training loss: 1.3665... 0.1221 sec/batch\n", "Epoch: 4/20... Training Step: 1764... Training loss: 1.5026... 0.1198 sec/batch\n", "Epoch: 4/20... Training Step: 1765... Training loss: 1.4163... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1766... Training loss: 1.5014... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1767... Training loss: 1.4156... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1768... Training loss: 1.3675... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1769... Training loss: 1.4033... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1770... Training loss: 1.4267... 0.1186 sec/batch\n", "Epoch: 4/20... Training Step: 1771... Training loss: 1.3637... 0.1161 sec/batch\n", "Epoch: 4/20... Training Step: 1772... Training loss: 1.5148... 0.1200 sec/batch\n", "Epoch: 4/20... Training Step: 1773... Training loss: 1.2203... 0.1197 sec/batch\n", "Epoch: 4/20... Training Step: 1774... Training loss: 1.2130... 0.1205 sec/batch\n", "Epoch: 4/20... Training Step: 1775... Training loss: 1.3131... 0.1195 sec/batch\n", "Epoch: 4/20... Training Step: 1776... Training loss: 1.3942... 0.1196 sec/batch\n", "Epoch: 4/20... Training Step: 1777... Training loss: 1.2957... 0.1184 sec/batch\n", "Epoch: 4/20... Training Step: 1778... Training loss: 1.3659... 0.1243 sec/batch\n", "Epoch: 4/20... Training Step: 1779... Training loss: 1.3520... 0.1225 sec/batch\n", "Epoch: 4/20... Training Step: 1780... Training loss: 1.2150... 0.1190 sec/batch\n", "Epoch: 4/20... Training Step: 1781... Training loss: 1.2827... 0.1175 sec/batch\n", "Epoch: 4/20... Training Step: 1782... Training loss: 1.3544... 0.1189 sec/batch\n", "Epoch: 4/20... Training Step: 1783... Training loss: 1.2194... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1784... Training loss: 1.2046... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1785... Training loss: 1.2055... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1786... Training loss: 1.2770... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1787... Training loss: 1.2163... 0.1212 sec/batch\n", "Epoch: 4/20... Training Step: 1788... Training loss: 1.5733... 0.1216 sec/batch\n", "Epoch: 4/20... Training Step: 1789... Training loss: 1.2982... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1790... Training loss: 1.2508... 0.1250 sec/batch\n", "Epoch: 4/20... Training Step: 1791... Training loss: 1.3930... 0.1236 sec/batch\n", "Epoch: 4/20... Training Step: 1792... Training loss: 1.2744... 0.1216 sec/batch\n", "Epoch: 4/20... Training Step: 1793... Training loss: 1.2828... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1794... Training loss: 1.2882... 0.1256 sec/batch\n", "Epoch: 4/20... Training Step: 1795... Training loss: 1.1593... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1796... Training loss: 1.3708... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1797... Training loss: 1.2979... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1798... Training loss: 1.6349... 0.1223 sec/batch\n", "Epoch: 4/20... Training Step: 1799... Training loss: 1.3745... 0.1240 sec/batch\n", "Epoch: 4/20... Training Step: 1800... Training loss: 1.6144... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1801... Training loss: 1.4490... 0.1205 sec/batch\n", "Epoch: 4/20... Training Step: 1802... Training loss: 1.3823... 0.1222 sec/batch\n", "Epoch: 4/20... Training Step: 1803... Training loss: 1.2400... 0.1209 sec/batch\n", "Epoch: 4/20... Training Step: 1804... Training loss: 1.4599... 0.1212 sec/batch\n", "Epoch: 4/20... Training Step: 1805... Training loss: 1.5113... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1806... Training loss: 1.4261... 0.1172 sec/batch\n", "Epoch: 4/20... Training Step: 1807... Training loss: 1.6043... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1808... Training loss: 1.6254... 0.1239 sec/batch\n", "Epoch: 4/20... Training Step: 1809... Training loss: 1.6262... 0.1234 sec/batch\n", "Epoch: 4/20... Training Step: 1810... Training loss: 1.2339... 0.1252 sec/batch\n", "Epoch: 4/20... Training Step: 1811... Training loss: 1.4666... 0.1193 sec/batch\n", "Epoch: 4/20... Training Step: 1812... Training loss: 1.1788... 0.1275 sec/batch\n", "Epoch: 4/20... Training Step: 1813... Training loss: 1.5304... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1814... Training loss: 1.4188... 0.1226 sec/batch\n", "Epoch: 4/20... Training Step: 1815... Training loss: 1.5319... 0.1204 sec/batch\n", "Epoch: 4/20... Training Step: 1816... Training loss: 1.5240... 0.1202 sec/batch\n", "Epoch: 4/20... Training Step: 1817... Training loss: 1.3966... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1818... Training loss: 1.3632... 0.1177 sec/batch\n", "Epoch: 4/20... Training Step: 1819... Training loss: 1.4918... 0.1213 sec/batch\n", "Epoch: 4/20... Training Step: 1820... Training loss: 1.4537... 0.1198 sec/batch\n", "Epoch: 4/20... Training Step: 1821... Training loss: 1.2203... 0.1245 sec/batch\n", "Epoch: 4/20... Training Step: 1822... Training loss: 1.4192... 0.1237 sec/batch\n", "Epoch: 4/20... Training Step: 1823... Training loss: 1.6875... 0.1224 sec/batch\n", "Epoch: 4/20... Training Step: 1824... Training loss: 1.3888... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1825... Training loss: 1.7070... 0.1238 sec/batch\n", "Epoch: 4/20... Training Step: 1826... Training loss: 1.5605... 0.1244 sec/batch\n", "Epoch: 4/20... Training Step: 1827... Training loss: 1.2419... 0.1290 sec/batch\n", "Epoch: 4/20... Training Step: 1828... Training loss: 1.3892... 0.1277 sec/batch\n", "Epoch: 4/20... Training Step: 1829... Training loss: 1.3549... 0.1251 sec/batch\n", "Epoch: 4/20... Training Step: 1830... Training loss: 1.6378... 0.1176 sec/batch\n", "Epoch: 4/20... Training Step: 1831... Training loss: 1.6030... 0.1207 sec/batch\n", "Epoch: 4/20... Training Step: 1832... Training loss: 1.6696... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1833... Training loss: 1.4242... 0.1178 sec/batch\n", "Epoch: 4/20... Training Step: 1834... Training loss: 1.3844... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1835... Training loss: 1.5733... 0.1199 sec/batch\n", "Epoch: 4/20... Training Step: 1836... Training loss: 1.3559... 0.1257 sec/batch\n", "Epoch: 4/20... Training Step: 1837... Training loss: 1.4308... 0.1295 sec/batch\n", "Epoch: 4/20... Training Step: 1838... Training loss: 1.2822... 0.1288 sec/batch\n", "Epoch: 4/20... Training Step: 1839... Training loss: 1.3567... 0.1241 sec/batch\n", "Epoch: 4/20... Training Step: 1840... Training loss: 1.1859... 0.1205 sec/batch\n", "Epoch: 4/20... Training Step: 1841... Training loss: 1.3348... 0.1231 sec/batch\n", "Epoch: 4/20... Training Step: 1842... Training loss: 1.4622... 0.1269 sec/batch\n", "Epoch: 4/20... Training Step: 1843... Training loss: 1.3571... 0.1211 sec/batch\n", "Epoch: 4/20... Training Step: 1844... Training loss: 1.4487... 0.1246 sec/batch\n", "Epoch: 4/20... Training Step: 1845... Training loss: 1.2888... 0.1223 sec/batch\n", "Epoch: 4/20... Training Step: 1846... Training loss: 1.6262... 0.1220 sec/batch\n", "Epoch: 4/20... Training Step: 1847... Training loss: 1.4500... 0.1233 sec/batch\n", "Epoch: 4/20... Training Step: 1848... Training loss: 1.2393... 0.1183 sec/batch\n", "Epoch: 4/20... Training Step: 1849... Training loss: 1.2943... 0.1254 sec/batch\n", "Epoch: 4/20... Training Step: 1850... Training loss: 1.2009... 0.1254 sec/batch\n", "Epoch: 4/20... Training Step: 1851... Training loss: 1.2055... 0.1217 sec/batch\n", "Epoch: 4/20... Training Step: 1852... Training loss: 1.3902... 0.1260 sec/batch\n", "Epoch: 4/20... Training Step: 1853... Training loss: 1.4751... 0.1182 sec/batch\n", "Epoch: 4/20... Training Step: 1854... Training loss: 1.4420... 0.1227 sec/batch\n", "Epoch: 4/20... Training Step: 1855... Training loss: 1.4047... 0.1219 sec/batch\n", "Epoch: 4/20... Training Step: 1856... Training loss: 1.2511... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 1857... Training loss: 1.7105... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 1858... Training loss: 1.4392... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 1859... Training loss: 1.3687... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 1860... Training loss: 1.3123... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 1861... Training loss: 1.4527... 0.1202 sec/batch\n", "Epoch: 5/20... Training Step: 1862... Training loss: 1.1734... 0.1185 sec/batch\n", "Epoch: 5/20... Training Step: 1863... Training loss: 1.4351... 0.1180 sec/batch\n", "Epoch: 5/20... Training Step: 1864... Training loss: 1.2495... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 1865... Training loss: 1.2998... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 1866... Training loss: 1.4686... 0.1215 sec/batch\n", "Epoch: 5/20... Training Step: 1867... Training loss: 1.3448... 0.1185 sec/batch\n", "Epoch: 5/20... Training Step: 1868... Training loss: 1.1075... 0.1214 sec/batch\n", "Epoch: 5/20... Training Step: 1869... Training loss: 1.5307... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 1870... Training loss: 1.0672... 0.1218 sec/batch\n", "Epoch: 5/20... Training Step: 1871... Training loss: 1.3289... 0.1217 sec/batch\n", "Epoch: 5/20... Training Step: 1872... Training loss: 1.4295... 0.1224 sec/batch\n", "Epoch: 5/20... Training Step: 1873... Training loss: 1.2210... 0.1206 sec/batch\n", "Epoch: 5/20... Training Step: 1874... Training loss: 1.1445... 0.1275 sec/batch\n", "Epoch: 5/20... Training Step: 1875... Training loss: 1.3625... 0.1205 sec/batch\n", "Epoch: 5/20... Training Step: 1876... Training loss: 1.1365... 0.1285 sec/batch\n", "Epoch: 5/20... Training Step: 1877... Training loss: 1.4231... 0.1338 sec/batch\n", "Epoch: 5/20... Training Step: 1878... Training loss: 1.2400... 0.1332 sec/batch\n", "Epoch: 5/20... Training Step: 1879... Training loss: 1.4825... 0.1294 sec/batch\n", "Epoch: 5/20... Training Step: 1880... Training loss: 1.2371... 0.1276 sec/batch\n", "Epoch: 5/20... Training Step: 1881... Training loss: 1.3419... 0.1256 sec/batch\n", "Epoch: 5/20... Training Step: 1882... Training loss: 1.2379... 0.1311 sec/batch\n", "Epoch: 5/20... Training Step: 1883... Training loss: 1.4356... 0.1354 sec/batch\n", "Epoch: 5/20... Training Step: 1884... Training loss: 1.1254... 0.1272 sec/batch\n", "Epoch: 5/20... Training Step: 1885... Training loss: 1.2998... 0.1333 sec/batch\n", "Epoch: 5/20... Training Step: 1886... Training loss: 1.3118... 0.1359 sec/batch\n", "Epoch: 5/20... Training Step: 1887... Training loss: 1.1696... 0.1302 sec/batch\n", "Epoch: 5/20... Training Step: 1888... Training loss: 1.2502... 0.1343 sec/batch\n", "Epoch: 5/20... Training Step: 1889... Training loss: 1.1884... 0.1181 sec/batch\n", "Epoch: 5/20... Training Step: 1890... Training loss: 1.1185... 0.1325 sec/batch\n", "Epoch: 5/20... Training Step: 1891... Training loss: 1.1801... 0.1278 sec/batch\n", "Epoch: 5/20... Training Step: 1892... Training loss: 1.2366... 0.1285 sec/batch\n", "Epoch: 5/20... Training Step: 1893... Training loss: 1.3793... 0.1291 sec/batch\n", "Epoch: 5/20... Training Step: 1894... Training loss: 1.1703... 0.1350 sec/batch\n", "Epoch: 5/20... Training Step: 1895... Training loss: 1.2572... 0.1330 sec/batch\n", "Epoch: 5/20... Training Step: 1896... Training loss: 1.5438... 0.1270 sec/batch\n", "Epoch: 5/20... Training Step: 1897... Training loss: 1.2769... 0.1335 sec/batch\n", "Epoch: 5/20... Training Step: 1898... Training loss: 1.1648... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 1899... Training loss: 1.4280... 0.1296 sec/batch\n", "Epoch: 5/20... Training Step: 1900... Training loss: 1.1000... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 1901... Training loss: 1.2958... 0.1389 sec/batch\n", "Epoch: 5/20... Training Step: 1902... Training loss: 1.2596... 0.1345 sec/batch\n", "Epoch: 5/20... Training Step: 1903... Training loss: 1.2586... 0.1307 sec/batch\n", "Epoch: 5/20... Training Step: 1904... Training loss: 1.2538... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 1905... Training loss: 1.2464... 0.1273 sec/batch\n", "Epoch: 5/20... Training Step: 1906... Training loss: 1.3838... 0.1286 sec/batch\n", "Epoch: 5/20... Training Step: 1907... Training loss: 1.2342... 0.1372 sec/batch\n", "Epoch: 5/20... Training Step: 1908... Training loss: 1.3099... 0.1282 sec/batch\n", "Epoch: 5/20... Training Step: 1909... Training loss: 1.4159... 0.1301 sec/batch\n", "Epoch: 5/20... Training Step: 1910... Training loss: 1.4648... 0.1310 sec/batch\n", "Epoch: 5/20... Training Step: 1911... Training loss: 1.1761... 0.1243 sec/batch\n", "Epoch: 5/20... Training Step: 1912... Training loss: 1.2998... 0.1310 sec/batch\n", "Epoch: 5/20... Training Step: 1913... Training loss: 1.4938... 0.1315 sec/batch\n", "Epoch: 5/20... Training Step: 1914... Training loss: 1.3429... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 1915... Training loss: 1.1736... 0.1322 sec/batch\n", "Epoch: 5/20... Training Step: 1916... Training loss: 1.1663... 0.1367 sec/batch\n", "Epoch: 5/20... Training Step: 1917... Training loss: 1.3314... 0.1303 sec/batch\n", "Epoch: 5/20... Training Step: 1918... Training loss: 1.4389... 0.1276 sec/batch\n", "Epoch: 5/20... Training Step: 1919... Training loss: 1.1892... 0.1304 sec/batch\n", "Epoch: 5/20... Training Step: 1920... Training loss: 1.2446... 0.1375 sec/batch\n", "Epoch: 5/20... Training Step: 1921... Training loss: 1.2025... 0.1246 sec/batch\n", "Epoch: 5/20... Training Step: 1922... Training loss: 1.4916... 0.1248 sec/batch\n", "Epoch: 5/20... Training Step: 1923... Training loss: 1.2647... 0.1320 sec/batch\n", "Epoch: 5/20... Training Step: 1924... Training loss: 1.3336... 0.1256 sec/batch\n", "Epoch: 5/20... Training Step: 1925... Training loss: 1.2618... 0.1258 sec/batch\n", "Epoch: 5/20... Training Step: 1926... Training loss: 1.2409... 0.1357 sec/batch\n", "Epoch: 5/20... Training Step: 1927... Training loss: 1.3869... 0.1303 sec/batch\n", "Epoch: 5/20... Training Step: 1928... Training loss: 1.3183... 0.1320 sec/batch\n", "Epoch: 5/20... Training Step: 1929... Training loss: 1.3104... 0.1383 sec/batch\n", "Epoch: 5/20... Training Step: 1930... Training loss: 1.1808... 0.1385 sec/batch\n", "Epoch: 5/20... Training Step: 1931... Training loss: 1.4363... 0.1305 sec/batch\n", "Epoch: 5/20... Training Step: 1932... Training loss: 1.2102... 0.1289 sec/batch\n", "Epoch: 5/20... Training Step: 1933... Training loss: 1.2449... 0.1369 sec/batch\n", "Epoch: 5/20... Training Step: 1934... Training loss: 1.2348... 0.1138 sec/batch\n", "Epoch: 5/20... Training Step: 1935... Training loss: 1.2913... 0.1205 sec/batch\n", "Epoch: 5/20... Training Step: 1936... Training loss: 1.2973... 0.1301 sec/batch\n", "Epoch: 5/20... Training Step: 1937... Training loss: 1.4525... 0.1316 sec/batch\n", "Epoch: 5/20... Training Step: 1938... Training loss: 1.3099... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 1939... Training loss: 1.3122... 0.1203 sec/batch\n", "Epoch: 5/20... Training Step: 1940... Training loss: 1.4561... 0.1139 sec/batch\n", "Epoch: 5/20... Training Step: 1941... Training loss: 1.4068... 0.1303 sec/batch\n", "Epoch: 5/20... Training Step: 1942... Training loss: 1.3605... 0.1296 sec/batch\n", "Epoch: 5/20... Training Step: 1943... Training loss: 1.2415... 0.1241 sec/batch\n", "Epoch: 5/20... Training Step: 1944... Training loss: 1.4320... 0.1227 sec/batch\n", "Epoch: 5/20... Training Step: 1945... Training loss: 1.4569... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 1946... Training loss: 1.2464... 0.1224 sec/batch\n", "Epoch: 5/20... Training Step: 1947... Training loss: 1.4470... 0.1210 sec/batch\n", "Epoch: 5/20... Training Step: 1948... Training loss: 1.4155... 0.1351 sec/batch\n", "Epoch: 5/20... Training Step: 1949... Training loss: 1.2021... 0.1289 sec/batch\n", "Epoch: 5/20... Training Step: 1950... Training loss: 1.4768... 0.1244 sec/batch\n", "Epoch: 5/20... Training Step: 1951... Training loss: 1.2854... 0.1321 sec/batch\n", "Epoch: 5/20... Training Step: 1952... Training loss: 1.3493... 0.1369 sec/batch\n", "Epoch: 5/20... Training Step: 1953... Training loss: 1.6357... 0.1289 sec/batch\n", "Epoch: 5/20... Training Step: 1954... Training loss: 1.4641... 0.1247 sec/batch\n", "Epoch: 5/20... Training Step: 1955... Training loss: 1.3350... 0.1318 sec/batch\n", "Epoch: 5/20... Training Step: 1956... Training loss: 1.2323... 0.1292 sec/batch\n", "Epoch: 5/20... Training Step: 1957... Training loss: 1.4544... 0.1296 sec/batch\n", "Epoch: 5/20... Training Step: 1958... Training loss: 1.6270... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 1959... Training loss: 1.5493... 0.1312 sec/batch\n", "Epoch: 5/20... Training Step: 1960... Training loss: 1.3591... 0.1268 sec/batch\n", "Epoch: 5/20... Training Step: 1961... Training loss: 1.5031... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 1962... Training loss: 1.6247... 0.1215 sec/batch\n", "Epoch: 5/20... Training Step: 1963... Training loss: 1.4599... 0.1295 sec/batch\n", "Epoch: 5/20... Training Step: 1964... Training loss: 1.5750... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 1965... Training loss: 1.3838... 0.1406 sec/batch\n", "Epoch: 5/20... Training Step: 1966... Training loss: 1.2777... 0.1342 sec/batch\n", "Epoch: 5/20... Training Step: 1967... Training loss: 1.4589... 0.1398 sec/batch\n", "Epoch: 5/20... Training Step: 1968... Training loss: 1.4192... 0.1329 sec/batch\n", "Epoch: 5/20... Training Step: 1969... Training loss: 1.4203... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 1970... Training loss: 1.5021... 0.1325 sec/batch\n", "Epoch: 5/20... Training Step: 1971... Training loss: 1.4147... 0.1296 sec/batch\n", "Epoch: 5/20... Training Step: 1972... Training loss: 1.3449... 0.1320 sec/batch\n", "Epoch: 5/20... Training Step: 1973... Training loss: 1.4838... 0.1299 sec/batch\n", "Epoch: 5/20... Training Step: 1974... Training loss: 1.3985... 0.1306 sec/batch\n", "Epoch: 5/20... Training Step: 1975... Training loss: 1.3387... 0.1373 sec/batch\n", "Epoch: 5/20... Training Step: 1976... Training loss: 1.1368... 0.1269 sec/batch\n", "Epoch: 5/20... Training Step: 1977... Training loss: 1.3112... 0.1307 sec/batch\n", "Epoch: 5/20... Training Step: 1978... Training loss: 1.4505... 0.1320 sec/batch\n", "Epoch: 5/20... Training Step: 1979... Training loss: 1.3829... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 1980... Training loss: 1.5161... 0.1268 sec/batch\n", "Epoch: 5/20... Training Step: 1981... Training loss: 1.3980... 0.1318 sec/batch\n", "Epoch: 5/20... Training Step: 1982... Training loss: 1.1892... 0.1297 sec/batch\n", "Epoch: 5/20... Training Step: 1983... Training loss: 1.4018... 0.1304 sec/batch\n", "Epoch: 5/20... Training Step: 1984... Training loss: 1.3985... 0.1267 sec/batch\n", "Epoch: 5/20... Training Step: 1985... Training loss: 1.5136... 0.1328 sec/batch\n", "Epoch: 5/20... Training Step: 1986... Training loss: 1.2822... 0.1307 sec/batch\n", "Epoch: 5/20... Training Step: 1987... Training loss: 1.6553... 0.1299 sec/batch\n", "Epoch: 5/20... Training Step: 1988... Training loss: 1.3044... 0.1330 sec/batch\n", "Epoch: 5/20... Training Step: 1989... Training loss: 1.3977... 0.1297 sec/batch\n", "Epoch: 5/20... Training Step: 1990... Training loss: 1.4979... 0.1301 sec/batch\n", "Epoch: 5/20... Training Step: 1991... Training loss: 1.2436... 0.1321 sec/batch\n", "Epoch: 5/20... Training Step: 1992... Training loss: 1.1109... 0.1300 sec/batch\n", "Epoch: 5/20... Training Step: 1993... Training loss: 1.1794... 0.1300 sec/batch\n", "Epoch: 5/20... Training Step: 1994... Training loss: 1.4049... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 1995... Training loss: 1.2522... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 1996... Training loss: 1.4161... 0.1297 sec/batch\n", "Epoch: 5/20... Training Step: 1997... Training loss: 1.2564... 0.1243 sec/batch\n", "Epoch: 5/20... Training Step: 1998... Training loss: 1.2638... 0.1195 sec/batch\n", "Epoch: 5/20... Training Step: 1999... Training loss: 1.1311... 0.1188 sec/batch\n", "Epoch: 5/20... Training Step: 2000... Training loss: 1.4742... 0.1220 sec/batch\n", "Epoch: 5/20... Training Step: 2001... Training loss: 1.3373... 0.1201 sec/batch\n", "Epoch: 5/20... Training Step: 2002... Training loss: 1.2330... 0.1250 sec/batch\n", "Epoch: 5/20... Training Step: 2003... Training loss: 1.3269... 0.1252 sec/batch\n", "Epoch: 5/20... Training Step: 2004... Training loss: 1.3388... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2005... Training loss: 1.3626... 0.1262 sec/batch\n", "Epoch: 5/20... Training Step: 2006... Training loss: 1.5422... 0.1200 sec/batch\n", "Epoch: 5/20... Training Step: 2007... Training loss: 1.4364... 0.1206 sec/batch\n", "Epoch: 5/20... Training Step: 2008... Training loss: 1.4181... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2009... Training loss: 1.5090... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 2010... Training loss: 1.3017... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 2011... Training loss: 1.4210... 0.1232 sec/batch\n", "Epoch: 5/20... Training Step: 2012... Training loss: 1.2559... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2013... Training loss: 1.2238... 0.1236 sec/batch\n", "Epoch: 5/20... Training Step: 2014... Training loss: 1.2536... 0.1266 sec/batch\n", "Epoch: 5/20... Training Step: 2015... Training loss: 1.1755... 0.1243 sec/batch\n", "Epoch: 5/20... Training Step: 2016... Training loss: 1.2535... 0.1226 sec/batch\n", "Epoch: 5/20... Training Step: 2017... Training loss: 1.4850... 0.1190 sec/batch\n", "Epoch: 5/20... Training Step: 2018... Training loss: 1.3355... 0.1211 sec/batch\n", "Epoch: 5/20... Training Step: 2019... Training loss: 1.4649... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 2020... Training loss: 1.2030... 0.1236 sec/batch\n", "Epoch: 5/20... Training Step: 2021... Training loss: 1.3589... 0.1196 sec/batch\n", "Epoch: 5/20... Training Step: 2022... Training loss: 1.2917... 0.1188 sec/batch\n", "Epoch: 5/20... Training Step: 2023... Training loss: 1.2187... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 2024... Training loss: 1.5688... 0.1228 sec/batch\n", "Epoch: 5/20... Training Step: 2025... Training loss: 1.3659... 0.1200 sec/batch\n", "Epoch: 5/20... Training Step: 2026... Training loss: 1.4193... 0.1325 sec/batch\n", "Epoch: 5/20... Training Step: 2027... Training loss: 1.3733... 0.1341 sec/batch\n", "Epoch: 5/20... Training Step: 2028... Training loss: 1.4903... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 2029... Training loss: 1.3682... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2030... Training loss: 1.4203... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2031... Training loss: 1.4826... 0.1158 sec/batch\n", "Epoch: 5/20... Training Step: 2032... Training loss: 1.1715... 0.1249 sec/batch\n", "Epoch: 5/20... Training Step: 2033... Training loss: 1.1239... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 2034... Training loss: 1.4076... 0.1162 sec/batch\n", "Epoch: 5/20... Training Step: 2035... Training loss: 1.2335... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 2036... Training loss: 1.4004... 0.1203 sec/batch\n", "Epoch: 5/20... Training Step: 2037... Training loss: 1.2360... 0.1220 sec/batch\n", "Epoch: 5/20... Training Step: 2038... Training loss: 1.4961... 0.1233 sec/batch\n", "Epoch: 5/20... Training Step: 2039... Training loss: 1.4774... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 2040... Training loss: 1.3138... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2041... Training loss: 1.4879... 0.1273 sec/batch\n", "Epoch: 5/20... Training Step: 2042... Training loss: 1.4395... 0.1312 sec/batch\n", "Epoch: 5/20... Training Step: 2043... Training loss: 1.4202... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 2044... Training loss: 1.1512... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2045... Training loss: 1.4685... 0.1202 sec/batch\n", "Epoch: 5/20... Training Step: 2046... Training loss: 1.2811... 0.1248 sec/batch\n", "Epoch: 5/20... Training Step: 2047... Training loss: 1.2285... 0.1291 sec/batch\n", "Epoch: 5/20... Training Step: 2048... Training loss: 1.5609... 0.1284 sec/batch\n", "Epoch: 5/20... Training Step: 2049... Training loss: 1.4412... 0.1253 sec/batch\n", "Epoch: 5/20... Training Step: 2050... Training loss: 1.2376... 0.1265 sec/batch\n", "Epoch: 5/20... Training Step: 2051... Training loss: 1.4113... 0.1338 sec/batch\n", "Epoch: 5/20... Training Step: 2052... Training loss: 1.3083... 0.1321 sec/batch\n", "Epoch: 5/20... Training Step: 2053... Training loss: 1.2489... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2054... Training loss: 1.4137... 0.1216 sec/batch\n", "Epoch: 5/20... Training Step: 2055... Training loss: 1.0055... 0.1286 sec/batch\n", "Epoch: 5/20... Training Step: 2056... Training loss: 1.2158... 0.1273 sec/batch\n", "Epoch: 5/20... Training Step: 2057... Training loss: 1.2625... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 2058... Training loss: 1.2687... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2059... Training loss: 1.3201... 0.1393 sec/batch\n", "Epoch: 5/20... Training Step: 2060... Training loss: 1.5002... 0.1273 sec/batch\n", "Epoch: 5/20... Training Step: 2061... Training loss: 1.2850... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 2062... Training loss: 1.1946... 0.1206 sec/batch\n", "Epoch: 5/20... Training Step: 2063... Training loss: 1.2273... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 2064... Training loss: 1.3940... 0.1257 sec/batch\n", "Epoch: 5/20... Training Step: 2065... Training loss: 1.3493... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 2066... Training loss: 1.1373... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2067... Training loss: 1.1226... 0.1202 sec/batch\n", "Epoch: 5/20... Training Step: 2068... Training loss: 1.4506... 0.1214 sec/batch\n", "Epoch: 5/20... Training Step: 2069... Training loss: 1.5134... 0.1205 sec/batch\n", "Epoch: 5/20... Training Step: 2070... Training loss: 1.3288... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 2071... Training loss: 1.4938... 0.1267 sec/batch\n", "Epoch: 5/20... Training Step: 2072... Training loss: 1.2917... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 2073... Training loss: 1.3838... 0.1245 sec/batch\n", "Epoch: 5/20... Training Step: 2074... Training loss: 1.2705... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 2075... Training loss: 1.5055... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2076... Training loss: 1.2751... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 2077... Training loss: 1.2524... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2078... Training loss: 1.4998... 0.1214 sec/batch\n", "Epoch: 5/20... Training Step: 2079... Training loss: 1.5108... 0.1255 sec/batch\n", "Epoch: 5/20... Training Step: 2080... Training loss: 1.5976... 0.1239 sec/batch\n", "Epoch: 5/20... Training Step: 2081... Training loss: 1.4327... 0.1278 sec/batch\n", "Epoch: 5/20... Training Step: 2082... Training loss: 1.5730... 0.1260 sec/batch\n", "Epoch: 5/20... Training Step: 2083... Training loss: 1.5314... 0.1323 sec/batch\n", "Epoch: 5/20... Training Step: 2084... Training loss: 1.1941... 0.1263 sec/batch\n", "Epoch: 5/20... Training Step: 2085... Training loss: 1.3786... 0.1315 sec/batch\n", "Epoch: 5/20... Training Step: 2086... Training loss: 1.3234... 0.1336 sec/batch\n", "Epoch: 5/20... Training Step: 2087... Training loss: 1.3384... 0.1253 sec/batch\n", "Epoch: 5/20... Training Step: 2088... Training loss: 1.2511... 0.1285 sec/batch\n", "Epoch: 5/20... Training Step: 2089... Training loss: 1.6410... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2090... Training loss: 1.3386... 0.1193 sec/batch\n", "Epoch: 5/20... Training Step: 2091... Training loss: 1.5113... 0.1273 sec/batch\n", "Epoch: 5/20... Training Step: 2092... Training loss: 1.3396... 0.1211 sec/batch\n", "Epoch: 5/20... Training Step: 2093... Training loss: 1.6290... 0.1292 sec/batch\n", "Epoch: 5/20... Training Step: 2094... Training loss: 1.2716... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 2095... Training loss: 1.3236... 0.1286 sec/batch\n", "Epoch: 5/20... Training Step: 2096... Training loss: 1.4320... 0.1312 sec/batch\n", "Epoch: 5/20... Training Step: 2097... Training loss: 1.3221... 0.1297 sec/batch\n", "Epoch: 5/20... Training Step: 2098... Training loss: 1.2143... 0.1261 sec/batch\n", "Epoch: 5/20... Training Step: 2099... Training loss: 1.4509... 0.1210 sec/batch\n", "Epoch: 5/20... Training Step: 2100... Training loss: 1.4199... 0.1252 sec/batch\n", "Epoch: 5/20... Training Step: 2101... Training loss: 1.3452... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2102... Training loss: 1.1982... 0.1189 sec/batch\n", "Epoch: 5/20... Training Step: 2103... Training loss: 1.3105... 0.1236 sec/batch\n", "Epoch: 5/20... Training Step: 2104... Training loss: 1.4242... 0.1205 sec/batch\n", "Epoch: 5/20... Training Step: 2105... Training loss: 1.3309... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 2106... Training loss: 1.2867... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 2107... Training loss: 1.5059... 0.1287 sec/batch\n", "Epoch: 5/20... Training Step: 2108... Training loss: 1.2699... 0.1256 sec/batch\n", "Epoch: 5/20... Training Step: 2109... Training loss: 1.2599... 0.1243 sec/batch\n", "Epoch: 5/20... Training Step: 2110... Training loss: 1.3264... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 2111... Training loss: 1.3123... 0.1191 sec/batch\n", "Epoch: 5/20... Training Step: 2112... Training loss: 1.4189... 0.1174 sec/batch\n", "Epoch: 5/20... Training Step: 2113... Training loss: 1.4893... 0.1279 sec/batch\n", "Epoch: 5/20... Training Step: 2114... Training loss: 1.1976... 0.1254 sec/batch\n", "Epoch: 5/20... Training Step: 2115... Training loss: 1.1844... 0.1213 sec/batch\n", "Epoch: 5/20... Training Step: 2116... Training loss: 1.2370... 0.1286 sec/batch\n", "Epoch: 5/20... Training Step: 2117... Training loss: 1.2447... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 2118... Training loss: 1.4092... 0.1261 sec/batch\n", "Epoch: 5/20... Training Step: 2119... Training loss: 1.3564... 0.1290 sec/batch\n", "Epoch: 5/20... Training Step: 2120... Training loss: 1.4175... 0.1282 sec/batch\n", "Epoch: 5/20... Training Step: 2121... Training loss: 1.5402... 0.1307 sec/batch\n", "Epoch: 5/20... Training Step: 2122... Training loss: 1.3617... 0.1244 sec/batch\n", "Epoch: 5/20... Training Step: 2123... Training loss: 1.6011... 0.1165 sec/batch\n", "Epoch: 5/20... Training Step: 2124... Training loss: 1.4850... 0.1316 sec/batch\n", "Epoch: 5/20... Training Step: 2125... Training loss: 1.3993... 0.1218 sec/batch\n", "Epoch: 5/20... Training Step: 2126... Training loss: 1.4731... 0.1224 sec/batch\n", "Epoch: 5/20... Training Step: 2127... Training loss: 1.4577... 0.1325 sec/batch\n", "Epoch: 5/20... Training Step: 2128... Training loss: 1.4428... 0.1301 sec/batch\n", "Epoch: 5/20... Training Step: 2129... Training loss: 1.5331... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 2130... Training loss: 1.3790... 0.1200 sec/batch\n", "Epoch: 5/20... Training Step: 2131... Training loss: 1.4644... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2132... Training loss: 1.3375... 0.1204 sec/batch\n", "Epoch: 5/20... Training Step: 2133... Training loss: 1.4570... 0.1232 sec/batch\n", "Epoch: 5/20... Training Step: 2134... Training loss: 1.6120... 0.1202 sec/batch\n", "Epoch: 5/20... Training Step: 2135... Training loss: 1.3700... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2136... Training loss: 1.3754... 0.1206 sec/batch\n", "Epoch: 5/20... Training Step: 2137... Training loss: 1.2274... 0.1173 sec/batch\n", "Epoch: 5/20... Training Step: 2138... Training loss: 1.2678... 0.1255 sec/batch\n", "Epoch: 5/20... Training Step: 2139... Training loss: 1.2651... 0.1268 sec/batch\n", "Epoch: 5/20... Training Step: 2140... Training loss: 1.4352... 0.1201 sec/batch\n", "Epoch: 5/20... Training Step: 2141... Training loss: 1.1677... 0.1194 sec/batch\n", "Epoch: 5/20... Training Step: 2142... Training loss: 1.3226... 0.1230 sec/batch\n", "Epoch: 5/20... Training Step: 2143... Training loss: 1.3909... 0.1217 sec/batch\n", "Epoch: 5/20... Training Step: 2144... Training loss: 1.4291... 0.1218 sec/batch\n", "Epoch: 5/20... Training Step: 2145... Training loss: 1.4847... 0.1200 sec/batch\n", "Epoch: 5/20... Training Step: 2146... Training loss: 1.4530... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 2147... Training loss: 1.4686... 0.1188 sec/batch\n", "Epoch: 5/20... Training Step: 2148... Training loss: 1.2717... 0.1255 sec/batch\n", "Epoch: 5/20... Training Step: 2149... Training loss: 1.2172... 0.1174 sec/batch\n", "Epoch: 5/20... Training Step: 2150... Training loss: 1.3201... 0.1261 sec/batch\n", "Epoch: 5/20... Training Step: 2151... Training loss: 1.3613... 0.1223 sec/batch\n", "Epoch: 5/20... Training Step: 2152... Training loss: 1.5339... 0.1200 sec/batch\n", "Epoch: 5/20... Training Step: 2153... Training loss: 1.2476... 0.1256 sec/batch\n", "Epoch: 5/20... Training Step: 2154... Training loss: 1.3315... 0.1247 sec/batch\n", "Epoch: 5/20... Training Step: 2155... Training loss: 1.3830... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 2156... Training loss: 1.2626... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2157... Training loss: 1.4222... 0.1264 sec/batch\n", "Epoch: 5/20... Training Step: 2158... Training loss: 1.3713... 0.1262 sec/batch\n", "Epoch: 5/20... Training Step: 2159... Training loss: 1.1779... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2160... Training loss: 1.5389... 0.1224 sec/batch\n", "Epoch: 5/20... Training Step: 2161... Training loss: 1.1701... 0.1180 sec/batch\n", "Epoch: 5/20... Training Step: 2162... Training loss: 1.4128... 0.1177 sec/batch\n", "Epoch: 5/20... Training Step: 2163... Training loss: 1.3217... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2164... Training loss: 1.7374... 0.1249 sec/batch\n", "Epoch: 5/20... Training Step: 2165... Training loss: 1.3241... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2166... Training loss: 1.4510... 0.1205 sec/batch\n", "Epoch: 5/20... Training Step: 2167... Training loss: 1.3563... 0.1223 sec/batch\n", "Epoch: 5/20... Training Step: 2168... Training loss: 1.1241... 0.1207 sec/batch\n", "Epoch: 5/20... Training Step: 2169... Training loss: 1.3653... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 2170... Training loss: 1.3491... 0.1238 sec/batch\n", "Epoch: 5/20... Training Step: 2171... Training loss: 1.0264... 0.1245 sec/batch\n", "Epoch: 5/20... Training Step: 2172... Training loss: 1.2446... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2173... Training loss: 1.3225... 0.1177 sec/batch\n", "Epoch: 5/20... Training Step: 2174... Training loss: 1.2322... 0.1171 sec/batch\n", "Epoch: 5/20... Training Step: 2175... Training loss: 1.3576... 0.1215 sec/batch\n", "Epoch: 5/20... Training Step: 2176... Training loss: 1.2236... 0.1224 sec/batch\n", "Epoch: 5/20... Training Step: 2177... Training loss: 1.1469... 0.1203 sec/batch\n", "Epoch: 5/20... Training Step: 2178... Training loss: 1.4571... 0.1256 sec/batch\n", "Epoch: 5/20... Training Step: 2179... Training loss: 1.2698... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2180... Training loss: 1.1421... 0.1253 sec/batch\n", "Epoch: 5/20... Training Step: 2181... Training loss: 1.1433... 0.1213 sec/batch\n", "Epoch: 5/20... Training Step: 2182... Training loss: 1.2757... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 2183... Training loss: 1.3471... 0.1218 sec/batch\n", "Epoch: 5/20... Training Step: 2184... Training loss: 1.3731... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 2185... Training loss: 1.3316... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2186... Training loss: 1.1254... 0.1246 sec/batch\n", "Epoch: 5/20... Training Step: 2187... Training loss: 1.4053... 0.1191 sec/batch\n", "Epoch: 5/20... Training Step: 2188... Training loss: 1.3396... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2189... Training loss: 1.2039... 0.1249 sec/batch\n", "Epoch: 5/20... Training Step: 2190... Training loss: 1.3069... 0.1183 sec/batch\n", "Epoch: 5/20... Training Step: 2191... Training loss: 1.4495... 0.1211 sec/batch\n", "Epoch: 5/20... Training Step: 2192... Training loss: 1.1505... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2193... Training loss: 1.3098... 0.1222 sec/batch\n", "Epoch: 5/20... Training Step: 2194... Training loss: 1.1587... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 2195... Training loss: 1.4081... 0.1266 sec/batch\n", "Epoch: 5/20... Training Step: 2196... Training loss: 1.2032... 0.1248 sec/batch\n", "Epoch: 5/20... Training Step: 2197... Training loss: 1.1867... 0.1177 sec/batch\n", "Epoch: 5/20... Training Step: 2198... Training loss: 1.4075... 0.1234 sec/batch\n", "Epoch: 5/20... Training Step: 2199... Training loss: 1.0782... 0.1249 sec/batch\n", "Epoch: 5/20... Training Step: 2200... Training loss: 1.4022... 0.1191 sec/batch\n", "Epoch: 5/20... Training Step: 2201... Training loss: 1.2112... 0.1221 sec/batch\n", "Epoch: 5/20... Training Step: 2202... Training loss: 1.0665... 0.1220 sec/batch\n", "Epoch: 5/20... Training Step: 2203... Training loss: 1.1189... 0.1227 sec/batch\n", "Epoch: 5/20... Training Step: 2204... Training loss: 1.4799... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 2205... Training loss: 1.2105... 0.1260 sec/batch\n", "Epoch: 5/20... Training Step: 2206... Training loss: 1.3033... 0.1197 sec/batch\n", "Epoch: 5/20... Training Step: 2207... Training loss: 1.2723... 0.1227 sec/batch\n", "Epoch: 5/20... Training Step: 2208... Training loss: 1.1299... 0.1211 sec/batch\n", "Epoch: 5/20... Training Step: 2209... Training loss: 1.0649... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 2210... Training loss: 1.0279... 0.1237 sec/batch\n", "Epoch: 5/20... Training Step: 2211... Training loss: 1.3164... 0.1186 sec/batch\n", "Epoch: 5/20... Training Step: 2212... Training loss: 1.2554... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2213... Training loss: 1.2331... 0.1203 sec/batch\n", "Epoch: 5/20... Training Step: 2214... Training loss: 1.3963... 0.1192 sec/batch\n", "Epoch: 5/20... Training Step: 2215... Training loss: 1.4784... 0.1168 sec/batch\n", "Epoch: 5/20... Training Step: 2216... Training loss: 1.1013... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 2217... Training loss: 1.4188... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 2218... Training loss: 1.3214... 0.1239 sec/batch\n", "Epoch: 5/20... Training Step: 2219... Training loss: 1.3651... 0.1288 sec/batch\n", "Epoch: 5/20... Training Step: 2220... Training loss: 1.2781... 0.1258 sec/batch\n", "Epoch: 5/20... Training Step: 2221... Training loss: 1.2946... 0.1267 sec/batch\n", "Epoch: 5/20... Training Step: 2222... Training loss: 1.3607... 0.1199 sec/batch\n", "Epoch: 5/20... Training Step: 2223... Training loss: 1.2041... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2224... Training loss: 1.4460... 0.1196 sec/batch\n", "Epoch: 5/20... Training Step: 2225... Training loss: 1.4566... 0.1222 sec/batch\n", "Epoch: 5/20... Training Step: 2226... Training loss: 1.2434... 0.1268 sec/batch\n", "Epoch: 5/20... Training Step: 2227... Training loss: 1.1295... 0.1265 sec/batch\n", "Epoch: 5/20... Training Step: 2228... Training loss: 1.3996... 0.1202 sec/batch\n", "Epoch: 5/20... Training Step: 2229... Training loss: 1.2414... 0.1186 sec/batch\n", "Epoch: 5/20... Training Step: 2230... Training loss: 1.4211... 0.1176 sec/batch\n", "Epoch: 5/20... Training Step: 2231... Training loss: 1.2947... 0.1223 sec/batch\n", "Epoch: 5/20... Training Step: 2232... Training loss: 1.2941... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 2233... Training loss: 1.4468... 0.1201 sec/batch\n", "Epoch: 5/20... Training Step: 2234... Training loss: 1.4240... 0.1227 sec/batch\n", "Epoch: 5/20... Training Step: 2235... Training loss: 1.2270... 0.1216 sec/batch\n", "Epoch: 5/20... Training Step: 2236... Training loss: 1.4344... 0.1219 sec/batch\n", "Epoch: 5/20... Training Step: 2237... Training loss: 1.1316... 0.1211 sec/batch\n", "Epoch: 5/20... Training Step: 2238... Training loss: 1.1930... 0.1236 sec/batch\n", "Epoch: 5/20... Training Step: 2239... Training loss: 1.2547... 0.1173 sec/batch\n", "Epoch: 5/20... Training Step: 2240... Training loss: 1.3189... 0.1208 sec/batch\n", "Epoch: 5/20... Training Step: 2241... Training loss: 1.1847... 0.1217 sec/batch\n", "Epoch: 5/20... Training Step: 2242... Training loss: 1.2743... 0.1204 sec/batch\n", "Epoch: 5/20... Training Step: 2243... Training loss: 1.3364... 0.1240 sec/batch\n", "Epoch: 5/20... Training Step: 2244... Training loss: 1.1097... 0.1209 sec/batch\n", "Epoch: 5/20... Training Step: 2245... Training loss: 1.2344... 0.1229 sec/batch\n", "Epoch: 5/20... Training Step: 2246... Training loss: 1.3282... 0.1238 sec/batch\n", "Epoch: 5/20... Training Step: 2247... Training loss: 1.1812... 0.1285 sec/batch\n", "Epoch: 5/20... Training Step: 2248... Training loss: 1.2016... 0.1242 sec/batch\n", "Epoch: 5/20... Training Step: 2249... Training loss: 1.2162... 0.1287 sec/batch\n", "Epoch: 5/20... Training Step: 2250... Training loss: 1.2379... 0.1281 sec/batch\n", "Epoch: 5/20... Training Step: 2251... Training loss: 1.1340... 0.1266 sec/batch\n", "Epoch: 5/20... Training Step: 2252... Training loss: 1.4828... 0.1338 sec/batch\n", "Epoch: 5/20... Training Step: 2253... Training loss: 1.2306... 0.1288 sec/batch\n", "Epoch: 5/20... Training Step: 2254... Training loss: 1.1628... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2255... Training loss: 1.2777... 0.1235 sec/batch\n", "Epoch: 5/20... Training Step: 2256... Training loss: 1.2684... 0.1269 sec/batch\n", "Epoch: 5/20... Training Step: 2257... Training loss: 1.2504... 0.1259 sec/batch\n", "Epoch: 5/20... Training Step: 2258... Training loss: 1.2345... 0.1280 sec/batch\n", "Epoch: 5/20... Training Step: 2259... Training loss: 1.1592... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 2260... Training loss: 1.3156... 0.1366 sec/batch\n", "Epoch: 5/20... Training Step: 2261... Training loss: 1.2982... 0.1433 sec/batch\n", "Epoch: 5/20... Training Step: 2262... Training loss: 1.5661... 0.1304 sec/batch\n", "Epoch: 5/20... Training Step: 2263... Training loss: 1.2208... 0.1253 sec/batch\n", "Epoch: 5/20... Training Step: 2264... Training loss: 1.4968... 0.1316 sec/batch\n", "Epoch: 5/20... Training Step: 2265... Training loss: 1.4126... 0.1250 sec/batch\n", "Epoch: 5/20... Training Step: 2266... Training loss: 1.2998... 0.1282 sec/batch\n", "Epoch: 5/20... Training Step: 2267... Training loss: 1.1956... 0.1308 sec/batch\n", "Epoch: 5/20... Training Step: 2268... Training loss: 1.4148... 0.1302 sec/batch\n", "Epoch: 5/20... Training Step: 2269... Training loss: 1.3953... 0.1282 sec/batch\n", "Epoch: 5/20... Training Step: 2270... Training loss: 1.3235... 0.1295 sec/batch\n", "Epoch: 5/20... Training Step: 2271... Training loss: 1.5398... 0.1322 sec/batch\n", "Epoch: 5/20... Training Step: 2272... Training loss: 1.4907... 0.1231 sec/batch\n", "Epoch: 5/20... Training Step: 2273... Training loss: 1.4090... 0.1257 sec/batch\n", "Epoch: 5/20... Training Step: 2274... Training loss: 1.1847... 0.1181 sec/batch\n", "Epoch: 5/20... Training Step: 2275... Training loss: 1.3121... 0.1379 sec/batch\n", "Epoch: 5/20... Training Step: 2276... Training loss: 1.0537... 0.1335 sec/batch\n", "Epoch: 5/20... Training Step: 2277... Training loss: 1.3853... 0.1488 sec/batch\n", "Epoch: 5/20... Training Step: 2278... Training loss: 1.3854... 0.1264 sec/batch\n", "Epoch: 5/20... Training Step: 2279... Training loss: 1.4398... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 2280... Training loss: 1.5588... 0.1327 sec/batch\n", "Epoch: 5/20... Training Step: 2281... Training loss: 1.2669... 0.1276 sec/batch\n", "Epoch: 5/20... Training Step: 2282... Training loss: 1.3803... 0.1310 sec/batch\n", "Epoch: 5/20... Training Step: 2283... Training loss: 1.4367... 0.1296 sec/batch\n", "Epoch: 5/20... Training Step: 2284... Training loss: 1.4695... 0.1346 sec/batch\n", "Epoch: 5/20... Training Step: 2285... Training loss: 1.1804... 0.1366 sec/batch\n", "Epoch: 5/20... Training Step: 2286... Training loss: 1.3090... 0.1370 sec/batch\n", "Epoch: 5/20... Training Step: 2287... Training loss: 1.4822... 0.1354 sec/batch\n", "Epoch: 5/20... Training Step: 2288... Training loss: 1.3859... 0.1343 sec/batch\n", "Epoch: 5/20... Training Step: 2289... Training loss: 1.5533... 0.1243 sec/batch\n", "Epoch: 5/20... Training Step: 2290... Training loss: 1.4981... 0.1305 sec/batch\n", "Epoch: 5/20... Training Step: 2291... Training loss: 1.3120... 0.1345 sec/batch\n", "Epoch: 5/20... Training Step: 2292... Training loss: 1.3983... 0.1309 sec/batch\n", "Epoch: 5/20... Training Step: 2293... Training loss: 1.1974... 0.1382 sec/batch\n", "Epoch: 5/20... Training Step: 2294... Training loss: 1.5368... 0.1251 sec/batch\n", "Epoch: 5/20... Training Step: 2295... Training loss: 1.5380... 0.1457 sec/batch\n", "Epoch: 5/20... Training Step: 2296... Training loss: 1.6047... 0.1546 sec/batch\n", "Epoch: 5/20... Training Step: 2297... Training loss: 1.3679... 0.1530 sec/batch\n", "Epoch: 5/20... Training Step: 2298... Training loss: 1.2772... 0.1483 sec/batch\n", "Epoch: 5/20... Training Step: 2299... Training loss: 1.4528... 0.1311 sec/batch\n", "Epoch: 5/20... Training Step: 2300... Training loss: 1.2750... 0.1419 sec/batch\n", "Epoch: 5/20... Training Step: 2301... Training loss: 1.2816... 0.1385 sec/batch\n", "Epoch: 5/20... Training Step: 2302... Training loss: 1.3153... 0.1384 sec/batch\n", "Epoch: 5/20... Training Step: 2303... Training loss: 1.3220... 0.1355 sec/batch\n", "Epoch: 5/20... Training Step: 2304... Training loss: 1.2907... 0.1351 sec/batch\n", "Epoch: 5/20... Training Step: 2305... Training loss: 1.3137... 0.1494 sec/batch\n", "Epoch: 5/20... Training Step: 2306... Training loss: 1.3615... 0.1339 sec/batch\n", "Epoch: 5/20... Training Step: 2307... Training loss: 1.2708... 0.1317 sec/batch\n", "Epoch: 5/20... Training Step: 2308... Training loss: 1.3530... 0.1351 sec/batch\n", "Epoch: 5/20... Training Step: 2309... Training loss: 1.2334... 0.1302 sec/batch\n", "Epoch: 5/20... Training Step: 2310... Training loss: 1.6088... 0.1386 sec/batch\n", "Epoch: 5/20... Training Step: 2311... Training loss: 1.3722... 0.1317 sec/batch\n", "Epoch: 5/20... Training Step: 2312... Training loss: 1.1236... 0.1342 sec/batch\n", "Epoch: 5/20... Training Step: 2313... Training loss: 1.2260... 0.1341 sec/batch\n", "Epoch: 5/20... Training Step: 2314... Training loss: 1.1246... 0.1322 sec/batch\n", "Epoch: 5/20... Training Step: 2315... Training loss: 1.1872... 0.1337 sec/batch\n", "Epoch: 5/20... Training Step: 2316... Training loss: 1.2634... 0.1294 sec/batch\n", "Epoch: 5/20... Training Step: 2317... Training loss: 1.4518... 0.1356 sec/batch\n", "Epoch: 5/20... Training Step: 2318... Training loss: 1.2658... 0.1281 sec/batch\n", "Epoch: 5/20... Training Step: 2319... Training loss: 1.4098... 0.1212 sec/batch\n", "Epoch: 5/20... Training Step: 2320... Training loss: 1.2009... 0.1197 sec/batch\n", "Epoch: 6/20... Training Step: 2321... Training loss: 1.6311... 0.1237 sec/batch\n", "Epoch: 6/20... Training Step: 2322... Training loss: 1.3930... 0.1246 sec/batch\n", "Epoch: 6/20... Training Step: 2323... Training loss: 1.3058... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2324... Training loss: 1.2827... 0.1264 sec/batch\n", "Epoch: 6/20... Training Step: 2325... Training loss: 1.3382... 0.1179 sec/batch\n", "Epoch: 6/20... Training Step: 2326... Training loss: 1.1043... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2327... Training loss: 1.4317... 0.1271 sec/batch\n", "Epoch: 6/20... Training Step: 2328... Training loss: 1.1889... 0.1190 sec/batch\n", "Epoch: 6/20... Training Step: 2329... Training loss: 1.2613... 0.1216 sec/batch\n", "Epoch: 6/20... Training Step: 2330... Training loss: 1.3218... 0.1193 sec/batch\n", "Epoch: 6/20... Training Step: 2331... Training loss: 1.2366... 0.1186 sec/batch\n", "Epoch: 6/20... Training Step: 2332... Training loss: 1.0599... 0.1200 sec/batch\n", "Epoch: 6/20... Training Step: 2333... Training loss: 1.4858... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2334... Training loss: 1.0154... 0.1261 sec/batch\n", "Epoch: 6/20... Training Step: 2335... Training loss: 1.3003... 0.1286 sec/batch\n", "Epoch: 6/20... Training Step: 2336... Training loss: 1.3698... 0.1231 sec/batch\n", "Epoch: 6/20... Training Step: 2337... Training loss: 1.1545... 0.1205 sec/batch\n", "Epoch: 6/20... Training Step: 2338... Training loss: 1.1700... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2339... Training loss: 1.2990... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2340... Training loss: 1.1251... 0.1248 sec/batch\n", "Epoch: 6/20... Training Step: 2341... Training loss: 1.3312... 0.1210 sec/batch\n", "Epoch: 6/20... Training Step: 2342... Training loss: 1.2236... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2343... Training loss: 1.4102... 0.1248 sec/batch\n", "Epoch: 6/20... Training Step: 2344... Training loss: 1.1989... 0.1256 sec/batch\n", "Epoch: 6/20... Training Step: 2345... Training loss: 1.2348... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2346... Training loss: 1.3191... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2347... Training loss: 1.3903... 0.1200 sec/batch\n", "Epoch: 6/20... Training Step: 2348... Training loss: 1.0599... 0.1242 sec/batch\n", "Epoch: 6/20... Training Step: 2349... Training loss: 1.2737... 0.1183 sec/batch\n", "Epoch: 6/20... Training Step: 2350... Training loss: 1.2724... 0.1209 sec/batch\n", "Epoch: 6/20... Training Step: 2351... Training loss: 1.0888... 0.1266 sec/batch\n", "Epoch: 6/20... Training Step: 2352... Training loss: 1.2256... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2353... Training loss: 1.0456... 0.1243 sec/batch\n", "Epoch: 6/20... Training Step: 2354... Training loss: 1.0945... 0.1174 sec/batch\n", "Epoch: 6/20... Training Step: 2355... Training loss: 1.1539... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2356... Training loss: 1.2595... 0.1191 sec/batch\n", "Epoch: 6/20... Training Step: 2357... Training loss: 1.2769... 0.1265 sec/batch\n", "Epoch: 6/20... Training Step: 2358... Training loss: 1.0836... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2359... Training loss: 1.1771... 0.1263 sec/batch\n", "Epoch: 6/20... Training Step: 2360... Training loss: 1.4154... 0.1202 sec/batch\n", "Epoch: 6/20... Training Step: 2361... Training loss: 1.3142... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2362... Training loss: 1.1953... 0.1244 sec/batch\n", "Epoch: 6/20... Training Step: 2363... Training loss: 1.3591... 0.1226 sec/batch\n", "Epoch: 6/20... Training Step: 2364... Training loss: 1.0060... 0.1268 sec/batch\n", "Epoch: 6/20... Training Step: 2365... Training loss: 1.2036... 0.1237 sec/batch\n", "Epoch: 6/20... Training Step: 2366... Training loss: 1.1471... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2367... Training loss: 1.2212... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2368... Training loss: 1.2131... 0.1190 sec/batch\n", "Epoch: 6/20... Training Step: 2369... Training loss: 1.2261... 0.1263 sec/batch\n", "Epoch: 6/20... Training Step: 2370... Training loss: 1.3443... 0.1243 sec/batch\n", "Epoch: 6/20... Training Step: 2371... Training loss: 1.2670... 0.1235 sec/batch\n", "Epoch: 6/20... Training Step: 2372... Training loss: 1.2596... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2373... Training loss: 1.4464... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2374... Training loss: 1.4029... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2375... Training loss: 1.0272... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2376... Training loss: 1.1753... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2377... Training loss: 1.3865... 0.1190 sec/batch\n", "Epoch: 6/20... Training Step: 2378... Training loss: 1.3641... 0.1202 sec/batch\n", "Epoch: 6/20... Training Step: 2379... Training loss: 1.0820... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2380... Training loss: 1.1591... 0.1218 sec/batch\n", "Epoch: 6/20... Training Step: 2381... Training loss: 1.1741... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2382... Training loss: 1.4185... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2383... Training loss: 1.2276... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2384... Training loss: 1.2727... 0.1269 sec/batch\n", "Epoch: 6/20... Training Step: 2385... Training loss: 1.0896... 0.1365 sec/batch\n", "Epoch: 6/20... Training Step: 2386... Training loss: 1.4431... 0.1241 sec/batch\n", "Epoch: 6/20... Training Step: 2387... Training loss: 1.2559... 0.1264 sec/batch\n", "Epoch: 6/20... Training Step: 2388... Training loss: 1.3154... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2389... Training loss: 1.1405... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2390... Training loss: 1.2699... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2391... Training loss: 1.4147... 0.1253 sec/batch\n", "Epoch: 6/20... Training Step: 2392... Training loss: 1.2288... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2393... Training loss: 1.3025... 0.1268 sec/batch\n", "Epoch: 6/20... Training Step: 2394... Training loss: 1.1623... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2395... Training loss: 1.4626... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2396... Training loss: 1.1778... 0.1247 sec/batch\n", "Epoch: 6/20... Training Step: 2397... Training loss: 1.1353... 0.1226 sec/batch\n", "Epoch: 6/20... Training Step: 2398... Training loss: 1.2984... 0.1285 sec/batch\n", "Epoch: 6/20... Training Step: 2399... Training loss: 1.2978... 0.1203 sec/batch\n", "Epoch: 6/20... Training Step: 2400... Training loss: 1.1298... 0.1199 sec/batch\n", "Epoch: 6/20... Training Step: 2401... Training loss: 1.4595... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2402... Training loss: 1.2626... 0.1208 sec/batch\n", "Epoch: 6/20... Training Step: 2403... Training loss: 1.2375... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2404... Training loss: 1.4104... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2405... Training loss: 1.2687... 0.1203 sec/batch\n", "Epoch: 6/20... Training Step: 2406... Training loss: 1.2620... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2407... Training loss: 1.1840... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2408... Training loss: 1.3223... 0.1195 sec/batch\n", "Epoch: 6/20... Training Step: 2409... Training loss: 1.4587... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2410... Training loss: 1.2219... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2411... Training loss: 1.3596... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2412... Training loss: 1.5247... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2413... Training loss: 1.0828... 0.1264 sec/batch\n", "Epoch: 6/20... Training Step: 2414... Training loss: 1.2971... 0.1184 sec/batch\n", "Epoch: 6/20... Training Step: 2415... Training loss: 1.3302... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2416... Training loss: 1.2316... 0.1236 sec/batch\n", "Epoch: 6/20... Training Step: 2417... Training loss: 1.5149... 0.1278 sec/batch\n", "Epoch: 6/20... Training Step: 2418... Training loss: 1.3349... 0.1216 sec/batch\n", "Epoch: 6/20... Training Step: 2419... Training loss: 1.3305... 0.1198 sec/batch\n", "Epoch: 6/20... Training Step: 2420... Training loss: 1.1834... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2421... Training loss: 1.3429... 0.1213 sec/batch\n", "Epoch: 6/20... Training Step: 2422... Training loss: 1.5014... 0.1210 sec/batch\n", "Epoch: 6/20... Training Step: 2423... Training loss: 1.4785... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2424... Training loss: 1.2383... 0.1231 sec/batch\n", "Epoch: 6/20... Training Step: 2425... Training loss: 1.4800... 0.1201 sec/batch\n", "Epoch: 6/20... Training Step: 2426... Training loss: 1.5512... 0.1184 sec/batch\n", "Epoch: 6/20... Training Step: 2427... Training loss: 1.4062... 0.1216 sec/batch\n", "Epoch: 6/20... Training Step: 2428... Training loss: 1.4362... 0.1213 sec/batch\n", "Epoch: 6/20... Training Step: 2429... Training loss: 1.4424... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2430... Training loss: 1.2157... 0.1208 sec/batch\n", "Epoch: 6/20... Training Step: 2431... Training loss: 1.3395... 0.1193 sec/batch\n", "Epoch: 6/20... Training Step: 2432... Training loss: 1.2810... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2433... Training loss: 1.3109... 0.1221 sec/batch\n", "Epoch: 6/20... Training Step: 2434... Training loss: 1.5148... 0.1247 sec/batch\n", "Epoch: 6/20... Training Step: 2435... Training loss: 1.3909... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2436... Training loss: 1.1984... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2437... Training loss: 1.4642... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2438... Training loss: 1.4342... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2439... Training loss: 1.3017... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2440... Training loss: 1.1203... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2441... Training loss: 1.3539... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2442... Training loss: 1.3851... 0.1246 sec/batch\n", "Epoch: 6/20... Training Step: 2443... Training loss: 1.3193... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2444... Training loss: 1.5250... 0.1198 sec/batch\n", "Epoch: 6/20... Training Step: 2445... Training loss: 1.3380... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2446... Training loss: 1.1748... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2447... Training loss: 1.2455... 0.1241 sec/batch\n", "Epoch: 6/20... Training Step: 2448... Training loss: 1.3589... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2449... Training loss: 1.3846... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2450... Training loss: 1.2957... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2451... Training loss: 1.4721... 0.1262 sec/batch\n", "Epoch: 6/20... Training Step: 2452... Training loss: 1.3400... 0.1339 sec/batch\n", "Epoch: 6/20... Training Step: 2453... Training loss: 1.3076... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2454... Training loss: 1.4066... 0.1233 sec/batch\n", "Epoch: 6/20... Training Step: 2455... Training loss: 1.2339... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2456... Training loss: 1.0549... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2457... Training loss: 1.1928... 0.1267 sec/batch\n", "Epoch: 6/20... Training Step: 2458... Training loss: 1.3019... 0.1396 sec/batch\n", "Epoch: 6/20... Training Step: 2459... Training loss: 1.2046... 0.1308 sec/batch\n", "Epoch: 6/20... Training Step: 2460... Training loss: 1.2998... 0.1185 sec/batch\n", "Epoch: 6/20... Training Step: 2461... Training loss: 1.2088... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2462... Training loss: 1.2659... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2463... Training loss: 1.1465... 0.1244 sec/batch\n", "Epoch: 6/20... Training Step: 2464... Training loss: 1.3121... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2465... Training loss: 1.2393... 0.1201 sec/batch\n", "Epoch: 6/20... Training Step: 2466... Training loss: 1.2197... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2467... Training loss: 1.3598... 0.1209 sec/batch\n", "Epoch: 6/20... Training Step: 2468... Training loss: 1.1963... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2469... Training loss: 1.2691... 0.1231 sec/batch\n", "Epoch: 6/20... Training Step: 2470... Training loss: 1.3611... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2471... Training loss: 1.3896... 0.1249 sec/batch\n", "Epoch: 6/20... Training Step: 2472... Training loss: 1.3983... 0.1246 sec/batch\n", "Epoch: 6/20... Training Step: 2473... Training loss: 1.4176... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2474... Training loss: 1.3656... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2475... Training loss: 1.2919... 0.1249 sec/batch\n", "Epoch: 6/20... Training Step: 2476... Training loss: 1.2340... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2477... Training loss: 1.2620... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2478... Training loss: 1.1818... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2479... Training loss: 1.2103... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2480... Training loss: 1.2561... 0.1249 sec/batch\n", "Epoch: 6/20... Training Step: 2481... Training loss: 1.4544... 0.1183 sec/batch\n", "Epoch: 6/20... Training Step: 2482... Training loss: 1.3303... 0.1190 sec/batch\n", "Epoch: 6/20... Training Step: 2483... Training loss: 1.3413... 0.1236 sec/batch\n", "Epoch: 6/20... Training Step: 2484... Training loss: 1.2096... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2485... Training loss: 1.3481... 0.1235 sec/batch\n", "Epoch: 6/20... Training Step: 2486... Training loss: 1.2442... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2487... Training loss: 1.2041... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2488... Training loss: 1.5734... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2489... Training loss: 1.2203... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2490... Training loss: 1.3087... 0.1199 sec/batch\n", "Epoch: 6/20... Training Step: 2491... Training loss: 1.4068... 0.1152 sec/batch\n", "Epoch: 6/20... Training Step: 2492... Training loss: 1.4106... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2493... Training loss: 1.1642... 0.1206 sec/batch\n", "Epoch: 6/20... Training Step: 2494... Training loss: 1.2704... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2495... Training loss: 1.3372... 0.1202 sec/batch\n", "Epoch: 6/20... Training Step: 2496... Training loss: 1.1189... 0.1197 sec/batch\n", "Epoch: 6/20... Training Step: 2497... Training loss: 1.0800... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2498... Training loss: 1.4059... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2499... Training loss: 1.1671... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2500... Training loss: 1.4284... 0.1183 sec/batch\n", "Epoch: 6/20... Training Step: 2501... Training loss: 1.1956... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2502... Training loss: 1.3876... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2503... Training loss: 1.2569... 0.1173 sec/batch\n", "Epoch: 6/20... Training Step: 2504... Training loss: 1.2678... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2505... Training loss: 1.4282... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2506... Training loss: 1.3326... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2507... Training loss: 1.4063... 0.1292 sec/batch\n", "Epoch: 6/20... Training Step: 2508... Training loss: 1.1530... 0.1300 sec/batch\n", "Epoch: 6/20... Training Step: 2509... Training loss: 1.4763... 0.1298 sec/batch\n", "Epoch: 6/20... Training Step: 2510... Training loss: 1.1846... 0.1266 sec/batch\n", "Epoch: 6/20... Training Step: 2511... Training loss: 1.2322... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2512... Training loss: 1.2969... 0.1256 sec/batch\n", "Epoch: 6/20... Training Step: 2513... Training loss: 1.2889... 0.1277 sec/batch\n", "Epoch: 6/20... Training Step: 2514... Training loss: 1.3047... 0.1170 sec/batch\n", "Epoch: 6/20... Training Step: 2515... Training loss: 1.4125... 0.1248 sec/batch\n", "Epoch: 6/20... Training Step: 2516... Training loss: 1.2930... 0.1305 sec/batch\n", "Epoch: 6/20... Training Step: 2517... Training loss: 1.2578... 0.1283 sec/batch\n", "Epoch: 6/20... Training Step: 2518... Training loss: 1.4139... 0.1359 sec/batch\n", "Epoch: 6/20... Training Step: 2519... Training loss: 1.0733... 0.1243 sec/batch\n", "Epoch: 6/20... Training Step: 2520... Training loss: 1.2980... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2521... Training loss: 1.2366... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2522... Training loss: 1.3186... 0.1210 sec/batch\n", "Epoch: 6/20... Training Step: 2523... Training loss: 1.2487... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2524... Training loss: 1.4179... 0.1236 sec/batch\n", "Epoch: 6/20... Training Step: 2525... Training loss: 1.2025... 0.1221 sec/batch\n", "Epoch: 6/20... Training Step: 2526... Training loss: 1.2384... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2527... Training loss: 1.1733... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2528... Training loss: 1.4089... 0.1582 sec/batch\n", "Epoch: 6/20... Training Step: 2529... Training loss: 1.3204... 0.1262 sec/batch\n", "Epoch: 6/20... Training Step: 2530... Training loss: 1.0509... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2531... Training loss: 1.0744... 0.1190 sec/batch\n", "Epoch: 6/20... Training Step: 2532... Training loss: 1.3947... 0.1246 sec/batch\n", "Epoch: 6/20... Training Step: 2533... Training loss: 1.4841... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2534... Training loss: 1.2203... 0.1203 sec/batch\n", "Epoch: 6/20... Training Step: 2535... Training loss: 1.5004... 0.1235 sec/batch\n", "Epoch: 6/20... Training Step: 2536... Training loss: 1.2421... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2537... Training loss: 1.3332... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2538... Training loss: 1.3127... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2539... Training loss: 1.4510... 0.1210 sec/batch\n", "Epoch: 6/20... Training Step: 2540... Training loss: 1.3265... 0.1195 sec/batch\n", "Epoch: 6/20... Training Step: 2541... Training loss: 1.2189... 0.1242 sec/batch\n", "Epoch: 6/20... Training Step: 2542... Training loss: 1.4283... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2543... Training loss: 1.4004... 0.1185 sec/batch\n", "Epoch: 6/20... Training Step: 2544... Training loss: 1.4495... 0.1283 sec/batch\n", "Epoch: 6/20... Training Step: 2545... Training loss: 1.3090... 0.1344 sec/batch\n", "Epoch: 6/20... Training Step: 2546... Training loss: 1.4558... 0.1292 sec/batch\n", "Epoch: 6/20... Training Step: 2547... Training loss: 1.5184... 0.1283 sec/batch\n", "Epoch: 6/20... Training Step: 2548... Training loss: 1.1978... 0.1278 sec/batch\n", "Epoch: 6/20... Training Step: 2549... Training loss: 1.3501... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2550... Training loss: 1.3034... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2551... Training loss: 1.2627... 0.1209 sec/batch\n", "Epoch: 6/20... Training Step: 2552... Training loss: 1.2194... 0.1185 sec/batch\n", "Epoch: 6/20... Training Step: 2553... Training loss: 1.5413... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2554... Training loss: 1.2447... 0.1181 sec/batch\n", "Epoch: 6/20... Training Step: 2555... Training loss: 1.5604... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2556... Training loss: 1.2957... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2557... Training loss: 1.5167... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2558... Training loss: 1.1945... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2559... Training loss: 1.4043... 0.1209 sec/batch\n", "Epoch: 6/20... Training Step: 2560... Training loss: 1.5053... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2561... Training loss: 1.2642... 0.1262 sec/batch\n", "Epoch: 6/20... Training Step: 2562... Training loss: 1.2124... 0.1272 sec/batch\n", "Epoch: 6/20... Training Step: 2563... Training loss: 1.3084... 0.1257 sec/batch\n", "Epoch: 6/20... Training Step: 2564... Training loss: 1.3336... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2565... Training loss: 1.3394... 0.1226 sec/batch\n", "Epoch: 6/20... Training Step: 2566... Training loss: 1.1771... 0.1193 sec/batch\n", "Epoch: 6/20... Training Step: 2567... Training loss: 1.2187... 0.1249 sec/batch\n", "Epoch: 6/20... Training Step: 2568... Training loss: 1.4689... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2569... Training loss: 1.2879... 0.1247 sec/batch\n", "Epoch: 6/20... Training Step: 2570... Training loss: 1.2866... 0.1201 sec/batch\n", "Epoch: 6/20... Training Step: 2571... Training loss: 1.4767... 0.1183 sec/batch\n", "Epoch: 6/20... Training Step: 2572... Training loss: 1.3109... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2573... Training loss: 1.2209... 0.1252 sec/batch\n", "Epoch: 6/20... Training Step: 2574... Training loss: 1.2947... 0.1252 sec/batch\n", "Epoch: 6/20... Training Step: 2575... Training loss: 1.2359... 0.1206 sec/batch\n", "Epoch: 6/20... Training Step: 2576... Training loss: 1.2758... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2577... Training loss: 1.3215... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2578... Training loss: 1.2118... 0.1233 sec/batch\n", "Epoch: 6/20... Training Step: 2579... Training loss: 1.1892... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2580... Training loss: 1.2476... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2581... Training loss: 1.3397... 0.1201 sec/batch\n", "Epoch: 6/20... Training Step: 2582... Training loss: 1.3370... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2583... Training loss: 1.2363... 0.1184 sec/batch\n", "Epoch: 6/20... Training Step: 2584... Training loss: 1.4695... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2585... Training loss: 1.4374... 0.1187 sec/batch\n", "Epoch: 6/20... Training Step: 2586... Training loss: 1.3395... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2587... Training loss: 1.5199... 0.1237 sec/batch\n", "Epoch: 6/20... Training Step: 2588... Training loss: 1.4206... 0.1168 sec/batch\n", "Epoch: 6/20... Training Step: 2589... Training loss: 1.3456... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2590... Training loss: 1.5636... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2591... Training loss: 1.4367... 0.1188 sec/batch\n", "Epoch: 6/20... Training Step: 2592... Training loss: 1.5237... 0.1191 sec/batch\n", "Epoch: 6/20... Training Step: 2593... Training loss: 1.4519... 0.1200 sec/batch\n", "Epoch: 6/20... Training Step: 2594... Training loss: 1.3393... 0.1347 sec/batch\n", "Epoch: 6/20... Training Step: 2595... Training loss: 1.3891... 0.1256 sec/batch\n", "Epoch: 6/20... Training Step: 2596... Training loss: 1.3778... 0.1285 sec/batch\n", "Epoch: 6/20... Training Step: 2597... Training loss: 1.3558... 0.1312 sec/batch\n", "Epoch: 6/20... Training Step: 2598... Training loss: 1.5473... 0.1301 sec/batch\n", "Epoch: 6/20... Training Step: 2599... Training loss: 1.2889... 0.1273 sec/batch\n", "Epoch: 6/20... Training Step: 2600... Training loss: 1.2634... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2601... Training loss: 1.2807... 0.1258 sec/batch\n", "Epoch: 6/20... Training Step: 2602... Training loss: 1.2044... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2603... Training loss: 1.2936... 0.1293 sec/batch\n", "Epoch: 6/20... Training Step: 2604... Training loss: 1.4126... 0.1272 sec/batch\n", "Epoch: 6/20... Training Step: 2605... Training loss: 1.2088... 0.1256 sec/batch\n", "Epoch: 6/20... Training Step: 2606... Training loss: 1.3395... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2607... Training loss: 1.3479... 0.1249 sec/batch\n", "Epoch: 6/20... Training Step: 2608... Training loss: 1.3363... 0.1241 sec/batch\n", "Epoch: 6/20... Training Step: 2609... Training loss: 1.3678... 0.1269 sec/batch\n", "Epoch: 6/20... Training Step: 2610... Training loss: 1.2880... 0.1178 sec/batch\n", "Epoch: 6/20... Training Step: 2611... Training loss: 1.2258... 0.1240 sec/batch\n", "Epoch: 6/20... Training Step: 2612... Training loss: 1.2708... 0.1263 sec/batch\n", "Epoch: 6/20... Training Step: 2613... Training loss: 1.1810... 0.1246 sec/batch\n", "Epoch: 6/20... Training Step: 2614... Training loss: 1.3215... 0.1242 sec/batch\n", "Epoch: 6/20... Training Step: 2615... Training loss: 1.2815... 0.1235 sec/batch\n", "Epoch: 6/20... Training Step: 2616... Training loss: 1.6054... 0.1163 sec/batch\n", "Epoch: 6/20... Training Step: 2617... Training loss: 1.2212... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2618... Training loss: 1.2007... 0.1281 sec/batch\n", "Epoch: 6/20... Training Step: 2619... Training loss: 1.2158... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2620... Training loss: 1.1755... 0.1238 sec/batch\n", "Epoch: 6/20... Training Step: 2621... Training loss: 1.3614... 0.1213 sec/batch\n", "Epoch: 6/20... Training Step: 2622... Training loss: 1.2741... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2623... Training loss: 1.0992... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2624... Training loss: 1.4738... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2625... Training loss: 1.2304... 0.1324 sec/batch\n", "Epoch: 6/20... Training Step: 2626... Training loss: 1.3359... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2627... Training loss: 1.3527... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2628... Training loss: 1.5696... 0.1153 sec/batch\n", "Epoch: 6/20... Training Step: 2629... Training loss: 1.3618... 0.1155 sec/batch\n", "Epoch: 6/20... Training Step: 2630... Training loss: 1.3211... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2631... Training loss: 1.3071... 0.1198 sec/batch\n", "Epoch: 6/20... Training Step: 2632... Training loss: 1.2305... 0.1288 sec/batch\n", "Epoch: 6/20... Training Step: 2633... Training loss: 1.2516... 0.1230 sec/batch\n", "Epoch: 6/20... Training Step: 2634... Training loss: 1.3357... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2635... Training loss: 1.0482... 0.1205 sec/batch\n", "Epoch: 6/20... Training Step: 2636... Training loss: 1.1613... 0.1237 sec/batch\n", "Epoch: 6/20... Training Step: 2637... Training loss: 1.0563... 0.1333 sec/batch\n", "Epoch: 6/20... Training Step: 2638... Training loss: 1.3082... 0.1238 sec/batch\n", "Epoch: 6/20... Training Step: 2639... Training loss: 1.2320... 0.1343 sec/batch\n", "Epoch: 6/20... Training Step: 2640... Training loss: 1.2189... 0.1269 sec/batch\n", "Epoch: 6/20... Training Step: 2641... Training loss: 1.1423... 0.1289 sec/batch\n", "Epoch: 6/20... Training Step: 2642... Training loss: 1.4718... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2643... Training loss: 1.2052... 0.1275 sec/batch\n", "Epoch: 6/20... Training Step: 2644... Training loss: 1.1737... 0.1191 sec/batch\n", "Epoch: 6/20... Training Step: 2645... Training loss: 1.1290... 0.1218 sec/batch\n", "Epoch: 6/20... Training Step: 2646... Training loss: 1.1563... 0.1336 sec/batch\n", "Epoch: 6/20... Training Step: 2647... Training loss: 1.2487... 0.1340 sec/batch\n", "Epoch: 6/20... Training Step: 2648... Training loss: 1.2252... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2649... Training loss: 1.2746... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2650... Training loss: 1.1701... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2651... Training loss: 1.2769... 0.1206 sec/batch\n", "Epoch: 6/20... Training Step: 2652... Training loss: 1.2191... 0.1215 sec/batch\n", "Epoch: 6/20... Training Step: 2653... Training loss: 1.1704... 0.1262 sec/batch\n", "Epoch: 6/20... Training Step: 2654... Training loss: 1.2015... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2655... Training loss: 1.4035... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2656... Training loss: 1.1808... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2657... Training loss: 1.2434... 0.1213 sec/batch\n", "Epoch: 6/20... Training Step: 2658... Training loss: 1.1943... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2659... Training loss: 1.3294... 0.1212 sec/batch\n", "Epoch: 6/20... Training Step: 2660... Training loss: 1.1892... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2661... Training loss: 1.1274... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2662... Training loss: 1.2657... 0.1130 sec/batch\n", "Epoch: 6/20... Training Step: 2663... Training loss: 1.0259... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2664... Training loss: 1.3320... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2665... Training loss: 1.1805... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2666... Training loss: 1.0993... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2667... Training loss: 1.0682... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2668... Training loss: 1.4938... 0.1202 sec/batch\n", "Epoch: 6/20... Training Step: 2669... Training loss: 1.2091... 0.1202 sec/batch\n", "Epoch: 6/20... Training Step: 2670... Training loss: 1.2793... 0.1198 sec/batch\n", "Epoch: 6/20... Training Step: 2671... Training loss: 1.2122... 0.1252 sec/batch\n", "Epoch: 6/20... Training Step: 2672... Training loss: 1.1276... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2673... Training loss: 1.0146... 0.1189 sec/batch\n", "Epoch: 6/20... Training Step: 2674... Training loss: 0.9861... 0.1251 sec/batch\n", "Epoch: 6/20... Training Step: 2675... Training loss: 1.2772... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2676... Training loss: 1.1507... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2677... Training loss: 1.1612... 0.1197 sec/batch\n", "Epoch: 6/20... Training Step: 2678... Training loss: 1.3257... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2679... Training loss: 1.5169... 0.1244 sec/batch\n", "Epoch: 6/20... Training Step: 2680... Training loss: 1.0488... 0.1287 sec/batch\n", "Epoch: 6/20... Training Step: 2681... Training loss: 1.5007... 0.1309 sec/batch\n", "Epoch: 6/20... Training Step: 2682... Training loss: 1.1452... 0.1189 sec/batch\n", "Epoch: 6/20... Training Step: 2683... Training loss: 1.1118... 0.1197 sec/batch\n", "Epoch: 6/20... Training Step: 2684... Training loss: 1.1954... 0.1199 sec/batch\n", "Epoch: 6/20... Training Step: 2685... Training loss: 1.1546... 0.1231 sec/batch\n", "Epoch: 6/20... Training Step: 2686... Training loss: 1.3192... 0.1176 sec/batch\n", "Epoch: 6/20... Training Step: 2687... Training loss: 1.2011... 0.1250 sec/batch\n", "Epoch: 6/20... Training Step: 2688... Training loss: 1.4733... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2689... Training loss: 1.2262... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2690... Training loss: 1.1838... 0.1271 sec/batch\n", "Epoch: 6/20... Training Step: 2691... Training loss: 1.1633... 0.1272 sec/batch\n", "Epoch: 6/20... Training Step: 2692... Training loss: 1.2878... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2693... Training loss: 1.2619... 0.1254 sec/batch\n", "Epoch: 6/20... Training Step: 2694... Training loss: 1.3523... 0.1234 sec/batch\n", "Epoch: 6/20... Training Step: 2695... Training loss: 1.2359... 0.1233 sec/batch\n", "Epoch: 6/20... Training Step: 2696... Training loss: 1.2706... 0.1244 sec/batch\n", "Epoch: 6/20... Training Step: 2697... Training loss: 1.3521... 0.1233 sec/batch\n", "Epoch: 6/20... Training Step: 2698... Training loss: 1.3451... 0.1238 sec/batch\n", "Epoch: 6/20... Training Step: 2699... Training loss: 1.1686... 0.1206 sec/batch\n", "Epoch: 6/20... Training Step: 2700... Training loss: 1.3626... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2701... Training loss: 1.1392... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2702... Training loss: 1.1876... 0.1217 sec/batch\n", "Epoch: 6/20... Training Step: 2703... Training loss: 1.2004... 0.1248 sec/batch\n", "Epoch: 6/20... Training Step: 2704... Training loss: 1.2247... 0.1201 sec/batch\n", "Epoch: 6/20... Training Step: 2705... Training loss: 1.1215... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2706... Training loss: 1.2892... 0.1206 sec/batch\n", "Epoch: 6/20... Training Step: 2707... Training loss: 1.2517... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2708... Training loss: 1.0285... 0.1183 sec/batch\n", "Epoch: 6/20... Training Step: 2709... Training loss: 1.0774... 0.1257 sec/batch\n", "Epoch: 6/20... Training Step: 2710... Training loss: 1.2098... 0.1220 sec/batch\n", "Epoch: 6/20... Training Step: 2711... Training loss: 1.0020... 0.1178 sec/batch\n", "Epoch: 6/20... Training Step: 2712... Training loss: 1.0795... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2713... Training loss: 1.0765... 0.1208 sec/batch\n", "Epoch: 6/20... Training Step: 2714... Training loss: 1.2854... 0.1189 sec/batch\n", "Epoch: 6/20... Training Step: 2715... Training loss: 1.1075... 0.1223 sec/batch\n", "Epoch: 6/20... Training Step: 2716... Training loss: 1.3624... 0.1243 sec/batch\n", "Epoch: 6/20... Training Step: 2717... Training loss: 1.1206... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2718... Training loss: 1.1915... 0.1262 sec/batch\n", "Epoch: 6/20... Training Step: 2719... Training loss: 1.1577... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2720... Training loss: 1.1904... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2721... Training loss: 1.1397... 0.1194 sec/batch\n", "Epoch: 6/20... Training Step: 2722... Training loss: 1.1919... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2723... Training loss: 1.1319... 0.1285 sec/batch\n", "Epoch: 6/20... Training Step: 2724... Training loss: 1.3126... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2725... Training loss: 1.2256... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2726... Training loss: 1.4587... 0.1203 sec/batch\n", "Epoch: 6/20... Training Step: 2727... Training loss: 1.2153... 0.1263 sec/batch\n", "Epoch: 6/20... Training Step: 2728... Training loss: 1.4818... 0.1203 sec/batch\n", "Epoch: 6/20... Training Step: 2729... Training loss: 1.3395... 0.1196 sec/batch\n", "Epoch: 6/20... Training Step: 2730... Training loss: 1.2651... 0.1245 sec/batch\n", "Epoch: 6/20... Training Step: 2731... Training loss: 1.1330... 0.1297 sec/batch\n", "Epoch: 6/20... Training Step: 2732... Training loss: 1.3653... 0.1308 sec/batch\n", "Epoch: 6/20... Training Step: 2733... Training loss: 1.3198... 0.1308 sec/batch\n", "Epoch: 6/20... Training Step: 2734... Training loss: 1.3021... 0.1338 sec/batch\n", "Epoch: 6/20... Training Step: 2735... Training loss: 1.5001... 0.1254 sec/batch\n", "Epoch: 6/20... Training Step: 2736... Training loss: 1.4025... 0.1309 sec/batch\n", "Epoch: 6/20... Training Step: 2737... Training loss: 1.3685... 0.1307 sec/batch\n", "Epoch: 6/20... Training Step: 2738... Training loss: 1.2107... 0.1322 sec/batch\n", "Epoch: 6/20... Training Step: 2739... Training loss: 1.2744... 0.1286 sec/batch\n", "Epoch: 6/20... Training Step: 2740... Training loss: 1.0829... 0.1244 sec/batch\n", "Epoch: 6/20... Training Step: 2741... Training loss: 1.4387... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2742... Training loss: 1.2008... 0.1229 sec/batch\n", "Epoch: 6/20... Training Step: 2743... Training loss: 1.3727... 0.1290 sec/batch\n", "Epoch: 6/20... Training Step: 2744... Training loss: 1.4844... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2745... Training loss: 1.2374... 0.1259 sec/batch\n", "Epoch: 6/20... Training Step: 2746... Training loss: 1.2671... 0.1257 sec/batch\n", "Epoch: 6/20... Training Step: 2747... Training loss: 1.3834... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2748... Training loss: 1.3830... 0.1199 sec/batch\n", "Epoch: 6/20... Training Step: 2749... Training loss: 1.1991... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2750... Training loss: 1.2974... 0.1214 sec/batch\n", "Epoch: 6/20... Training Step: 2751... Training loss: 1.5204... 0.1175 sec/batch\n", "Epoch: 6/20... Training Step: 2752... Training loss: 1.2199... 0.1257 sec/batch\n", "Epoch: 6/20... Training Step: 2753... Training loss: 1.6371... 0.1263 sec/batch\n", "Epoch: 6/20... Training Step: 2754... Training loss: 1.4243... 0.1258 sec/batch\n", "Epoch: 6/20... Training Step: 2755... Training loss: 1.2148... 0.1185 sec/batch\n", "Epoch: 6/20... Training Step: 2756... Training loss: 1.3230... 0.1256 sec/batch\n", "Epoch: 6/20... Training Step: 2757... Training loss: 1.2326... 0.1266 sec/batch\n", "Epoch: 6/20... Training Step: 2758... Training loss: 1.4747... 0.1204 sec/batch\n", "Epoch: 6/20... Training Step: 2759... Training loss: 1.4895... 0.1261 sec/batch\n", "Epoch: 6/20... Training Step: 2760... Training loss: 1.6230... 0.1221 sec/batch\n", "Epoch: 6/20... Training Step: 2761... Training loss: 1.2875... 0.1209 sec/batch\n", "Epoch: 6/20... Training Step: 2762... Training loss: 1.2346... 0.1221 sec/batch\n", "Epoch: 6/20... Training Step: 2763... Training loss: 1.3268... 0.1191 sec/batch\n", "Epoch: 6/20... Training Step: 2764... Training loss: 1.2308... 0.1207 sec/batch\n", "Epoch: 6/20... Training Step: 2765... Training loss: 1.3149... 0.1254 sec/batch\n", "Epoch: 6/20... Training Step: 2766... Training loss: 1.1606... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2767... Training loss: 1.2550... 0.1219 sec/batch\n", "Epoch: 6/20... Training Step: 2768... Training loss: 1.1248... 0.1224 sec/batch\n", "Epoch: 6/20... Training Step: 2769... Training loss: 1.2580... 0.1248 sec/batch\n", "Epoch: 6/20... Training Step: 2770... Training loss: 1.3312... 0.1228 sec/batch\n", "Epoch: 6/20... Training Step: 2771... Training loss: 1.3274... 0.1232 sec/batch\n", "Epoch: 6/20... Training Step: 2772... Training loss: 1.2951... 0.1216 sec/batch\n", "Epoch: 6/20... Training Step: 2773... Training loss: 1.1720... 0.1265 sec/batch\n", "Epoch: 6/20... Training Step: 2774... Training loss: 1.5893... 0.1210 sec/batch\n", "Epoch: 6/20... Training Step: 2775... Training loss: 1.4110... 0.1226 sec/batch\n", "Epoch: 6/20... Training Step: 2776... Training loss: 1.1963... 0.1227 sec/batch\n", "Epoch: 6/20... Training Step: 2777... Training loss: 1.1328... 0.1239 sec/batch\n", "Epoch: 6/20... Training Step: 2778... Training loss: 1.0506... 0.1225 sec/batch\n", "Epoch: 6/20... Training Step: 2779... Training loss: 1.0907... 0.1211 sec/batch\n", "Epoch: 6/20... Training Step: 2780... Training loss: 1.2546... 0.1198 sec/batch\n", "Epoch: 6/20... Training Step: 2781... Training loss: 1.4092... 0.1238 sec/batch\n", "Epoch: 6/20... Training Step: 2782... Training loss: 1.1441... 0.1231 sec/batch\n", "Epoch: 6/20... Training Step: 2783... Training loss: 1.2670... 0.1222 sec/batch\n", "Epoch: 6/20... Training Step: 2784... Training loss: 1.2103... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 2785... Training loss: 1.6022... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 2786... Training loss: 1.2978... 0.1223 sec/batch\n", "Epoch: 7/20... Training Step: 2787... Training loss: 1.2757... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2788... Training loss: 1.2762... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2789... Training loss: 1.3186... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 2790... Training loss: 1.1030... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2791... Training loss: 1.4062... 0.1198 sec/batch\n", "Epoch: 7/20... Training Step: 2792... Training loss: 1.2127... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2793... Training loss: 1.0612... 0.1180 sec/batch\n", "Epoch: 7/20... Training Step: 2794... Training loss: 1.3288... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 2795... Training loss: 1.1410... 0.1246 sec/batch\n", "Epoch: 7/20... Training Step: 2796... Training loss: 1.0711... 0.1205 sec/batch\n", "Epoch: 7/20... Training Step: 2797... Training loss: 1.4178... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2798... Training loss: 1.0990... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 2799... Training loss: 1.2992... 0.1213 sec/batch\n", "Epoch: 7/20... Training Step: 2800... Training loss: 1.3827... 0.1229 sec/batch\n", "Epoch: 7/20... Training Step: 2801... Training loss: 1.2046... 0.1240 sec/batch\n", "Epoch: 7/20... Training Step: 2802... Training loss: 1.0667... 0.1220 sec/batch\n", "Epoch: 7/20... Training Step: 2803... Training loss: 1.2391... 0.1273 sec/batch\n", "Epoch: 7/20... Training Step: 2804... Training loss: 1.0837... 0.1251 sec/batch\n", "Epoch: 7/20... Training Step: 2805... Training loss: 1.3235... 0.1246 sec/batch\n", "Epoch: 7/20... Training Step: 2806... Training loss: 1.1864... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2807... Training loss: 1.4851... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 2808... Training loss: 1.1897... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 2809... Training loss: 1.1682... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 2810... Training loss: 1.2310... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2811... Training loss: 1.3416... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 2812... Training loss: 1.0823... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 2813... Training loss: 1.1574... 0.1211 sec/batch\n", "Epoch: 7/20... Training Step: 2814... Training loss: 1.2997... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 2815... Training loss: 1.0314... 0.1267 sec/batch\n", "Epoch: 7/20... Training Step: 2816... Training loss: 1.1368... 0.1215 sec/batch\n", "Epoch: 7/20... Training Step: 2817... Training loss: 1.0489... 0.1185 sec/batch\n", "Epoch: 7/20... Training Step: 2818... Training loss: 1.1406... 0.1197 sec/batch\n", "Epoch: 7/20... Training Step: 2819... Training loss: 1.1123... 0.1178 sec/batch\n", "Epoch: 7/20... Training Step: 2820... Training loss: 1.1562... 0.1204 sec/batch\n", "Epoch: 7/20... Training Step: 2821... Training loss: 1.3419... 0.1194 sec/batch\n", "Epoch: 7/20... Training Step: 2822... Training loss: 1.1514... 0.1193 sec/batch\n", "Epoch: 7/20... Training Step: 2823... Training loss: 1.1650... 0.1216 sec/batch\n", "Epoch: 7/20... Training Step: 2824... Training loss: 1.4537... 0.1186 sec/batch\n", "Epoch: 7/20... Training Step: 2825... Training loss: 1.2162... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 2826... Training loss: 1.0869... 0.1190 sec/batch\n", "Epoch: 7/20... Training Step: 2827... Training loss: 1.3064... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 2828... Training loss: 1.0138... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2829... Training loss: 1.2151... 0.1217 sec/batch\n", "Epoch: 7/20... Training Step: 2830... Training loss: 1.1558... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 2831... Training loss: 1.2438... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 2832... Training loss: 1.2028... 0.1242 sec/batch\n", "Epoch: 7/20... Training Step: 2833... Training loss: 1.1428... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 2834... Training loss: 1.2648... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2835... Training loss: 1.2467... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 2836... Training loss: 1.2900... 0.1243 sec/batch\n", "Epoch: 7/20... Training Step: 2837... Training loss: 1.2872... 0.1210 sec/batch\n", "Epoch: 7/20... Training Step: 2838... Training loss: 1.1544... 0.1192 sec/batch\n", "Epoch: 7/20... Training Step: 2839... Training loss: 1.1249... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 2840... Training loss: 1.0838... 0.1202 sec/batch\n", "Epoch: 7/20... Training Step: 2841... Training loss: 1.2728... 0.1338 sec/batch\n", "Epoch: 7/20... Training Step: 2842... Training loss: 1.3078... 0.1325 sec/batch\n", "Epoch: 7/20... Training Step: 2843... Training loss: 1.0610... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 2844... Training loss: 1.0928... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 2845... Training loss: 1.1602... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 2846... Training loss: 1.3309... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2847... Training loss: 1.0787... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2848... Training loss: 1.1858... 0.1213 sec/batch\n", "Epoch: 7/20... Training Step: 2849... Training loss: 1.0879... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 2850... Training loss: 1.3595... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 2851... Training loss: 1.1286... 0.1198 sec/batch\n", "Epoch: 7/20... Training Step: 2852... Training loss: 1.2659... 0.1208 sec/batch\n", "Epoch: 7/20... Training Step: 2853... Training loss: 1.2338... 0.1182 sec/batch\n", "Epoch: 7/20... Training Step: 2854... Training loss: 1.2420... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 2855... Training loss: 1.3005... 0.1250 sec/batch\n", "Epoch: 7/20... Training Step: 2856... Training loss: 1.1923... 0.1210 sec/batch\n", "Epoch: 7/20... Training Step: 2857... Training loss: 1.1712... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 2858... Training loss: 1.0615... 0.1242 sec/batch\n", "Epoch: 7/20... Training Step: 2859... Training loss: 1.3661... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 2860... Training loss: 1.0779... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 2861... Training loss: 1.1600... 0.1213 sec/batch\n", "Epoch: 7/20... Training Step: 2862... Training loss: 1.2276... 0.1154 sec/batch\n", "Epoch: 7/20... Training Step: 2863... Training loss: 1.2054... 0.1197 sec/batch\n", "Epoch: 7/20... Training Step: 2864... Training loss: 1.1183... 0.1257 sec/batch\n", "Epoch: 7/20... Training Step: 2865... Training loss: 1.2551... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 2866... Training loss: 1.2596... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 2867... Training loss: 1.0930... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 2868... Training loss: 1.3685... 0.1224 sec/batch\n", "Epoch: 7/20... Training Step: 2869... Training loss: 1.2715... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2870... Training loss: 1.3676... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2871... Training loss: 1.1656... 0.1248 sec/batch\n", "Epoch: 7/20... Training Step: 2872... Training loss: 1.3329... 0.1228 sec/batch\n", "Epoch: 7/20... Training Step: 2873... Training loss: 1.4455... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 2874... Training loss: 1.1862... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 2875... Training loss: 1.3555... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2876... Training loss: 1.3545... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2877... Training loss: 1.0862... 0.1242 sec/batch\n", "Epoch: 7/20... Training Step: 2878... Training loss: 1.4092... 0.1167 sec/batch\n", "Epoch: 7/20... Training Step: 2879... Training loss: 1.2123... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 2880... Training loss: 1.1842... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 2881... Training loss: 1.4710... 0.1194 sec/batch\n", "Epoch: 7/20... Training Step: 2882... Training loss: 1.2854... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 2883... Training loss: 1.2302... 0.1188 sec/batch\n", "Epoch: 7/20... Training Step: 2884... Training loss: 1.1533... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 2885... Training loss: 1.3681... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2886... Training loss: 1.3707... 0.1215 sec/batch\n", "Epoch: 7/20... Training Step: 2887... Training loss: 1.5041... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 2888... Training loss: 1.2210... 0.1269 sec/batch\n", "Epoch: 7/20... Training Step: 2889... Training loss: 1.3641... 0.1275 sec/batch\n", "Epoch: 7/20... Training Step: 2890... Training loss: 1.4395... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 2891... Training loss: 1.2698... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2892... Training loss: 1.4226... 0.1224 sec/batch\n", "Epoch: 7/20... Training Step: 2893... Training loss: 1.2878... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 2894... Training loss: 1.2959... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 2895... Training loss: 1.3491... 0.1204 sec/batch\n", "Epoch: 7/20... Training Step: 2896... Training loss: 1.1730... 0.1245 sec/batch\n", "Epoch: 7/20... Training Step: 2897... Training loss: 1.3237... 0.1280 sec/batch\n", "Epoch: 7/20... Training Step: 2898... Training loss: 1.3830... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 2899... Training loss: 1.3956... 0.1281 sec/batch\n", "Epoch: 7/20... Training Step: 2900... Training loss: 1.2348... 0.1208 sec/batch\n", "Epoch: 7/20... Training Step: 2901... Training loss: 1.3487... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 2902... Training loss: 1.4461... 0.1190 sec/batch\n", "Epoch: 7/20... Training Step: 2903... Training loss: 1.2144... 0.1230 sec/batch\n", "Epoch: 7/20... Training Step: 2904... Training loss: 1.1711... 0.1243 sec/batch\n", "Epoch: 7/20... Training Step: 2905... Training loss: 1.2793... 0.1243 sec/batch\n", "Epoch: 7/20... Training Step: 2906... Training loss: 1.3035... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2907... Training loss: 1.3720... 0.1221 sec/batch\n", "Epoch: 7/20... Training Step: 2908... Training loss: 1.2514... 0.1269 sec/batch\n", "Epoch: 7/20... Training Step: 2909... Training loss: 1.3289... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 2910... Training loss: 1.0861... 0.1220 sec/batch\n", "Epoch: 7/20... Training Step: 2911... Training loss: 1.2257... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 2912... Training loss: 1.2460... 0.1220 sec/batch\n", "Epoch: 7/20... Training Step: 2913... Training loss: 1.3374... 0.1176 sec/batch\n", "Epoch: 7/20... Training Step: 2914... Training loss: 1.2274... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 2915... Training loss: 1.5028... 0.1224 sec/batch\n", "Epoch: 7/20... Training Step: 2916... Training loss: 1.2695... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2917... Training loss: 1.2075... 0.1258 sec/batch\n", "Epoch: 7/20... Training Step: 2918... Training loss: 1.4489... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 2919... Training loss: 1.2187... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2920... Training loss: 1.1162... 0.1191 sec/batch\n", "Epoch: 7/20... Training Step: 2921... Training loss: 1.1236... 0.1229 sec/batch\n", "Epoch: 7/20... Training Step: 2922... Training loss: 1.2346... 0.1249 sec/batch\n", "Epoch: 7/20... Training Step: 2923... Training loss: 1.1896... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2924... Training loss: 1.3334... 0.1208 sec/batch\n", "Epoch: 7/20... Training Step: 2925... Training loss: 1.1305... 0.1225 sec/batch\n", "Epoch: 7/20... Training Step: 2926... Training loss: 1.2262... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 2927... Training loss: 1.1787... 0.1202 sec/batch\n", "Epoch: 7/20... Training Step: 2928... Training loss: 1.2920... 0.1202 sec/batch\n", "Epoch: 7/20... Training Step: 2929... Training loss: 1.2056... 0.1204 sec/batch\n", "Epoch: 7/20... Training Step: 2930... Training loss: 1.1178... 0.1186 sec/batch\n", "Epoch: 7/20... Training Step: 2931... Training loss: 1.2777... 0.1210 sec/batch\n", "Epoch: 7/20... Training Step: 2932... Training loss: 1.1138... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2933... Training loss: 1.2942... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 2934... Training loss: 1.4486... 0.1211 sec/batch\n", "Epoch: 7/20... Training Step: 2935... Training loss: 1.3656... 0.1198 sec/batch\n", "Epoch: 7/20... Training Step: 2936... Training loss: 1.4583... 0.1221 sec/batch\n", "Epoch: 7/20... Training Step: 2937... Training loss: 1.5172... 0.1205 sec/batch\n", "Epoch: 7/20... Training Step: 2938... Training loss: 1.3577... 0.1245 sec/batch\n", "Epoch: 7/20... Training Step: 2939... Training loss: 1.2891... 0.1251 sec/batch\n", "Epoch: 7/20... Training Step: 2940... Training loss: 1.1759... 0.1182 sec/batch\n", "Epoch: 7/20... Training Step: 2941... Training loss: 1.2062... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 2942... Training loss: 1.1917... 0.1182 sec/batch\n", "Epoch: 7/20... Training Step: 2943... Training loss: 1.1627... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2944... Training loss: 1.1959... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 2945... Training loss: 1.3872... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 2946... Training loss: 1.2075... 0.1233 sec/batch\n", "Epoch: 7/20... Training Step: 2947... Training loss: 1.4168... 0.1262 sec/batch\n", "Epoch: 7/20... Training Step: 2948... Training loss: 1.0882... 0.1221 sec/batch\n", "Epoch: 7/20... Training Step: 2949... Training loss: 1.2932... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 2950... Training loss: 1.2506... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 2951... Training loss: 1.1376... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 2952... Training loss: 1.3625... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2953... Training loss: 1.1834... 0.1280 sec/batch\n", "Epoch: 7/20... Training Step: 2954... Training loss: 1.2536... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 2955... Training loss: 1.3487... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 2956... Training loss: 1.3959... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 2957... Training loss: 1.1147... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 2958... Training loss: 1.3603... 0.1252 sec/batch\n", "Epoch: 7/20... Training Step: 2959... Training loss: 1.2933... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 2960... Training loss: 1.1402... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 2961... Training loss: 1.1359... 0.1216 sec/batch\n", "Epoch: 7/20... Training Step: 2962... Training loss: 1.4074... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 2963... Training loss: 1.0790... 0.1220 sec/batch\n", "Epoch: 7/20... Training Step: 2964... Training loss: 1.3171... 0.1174 sec/batch\n", "Epoch: 7/20... Training Step: 2965... Training loss: 1.0411... 0.1194 sec/batch\n", "Epoch: 7/20... Training Step: 2966... Training loss: 1.3761... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 2967... Training loss: 1.3227... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 2968... Training loss: 1.1409... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 2969... Training loss: 1.4530... 0.1249 sec/batch\n", "Epoch: 7/20... Training Step: 2970... Training loss: 1.2685... 0.1342 sec/batch\n", "Epoch: 7/20... Training Step: 2971... Training loss: 1.3722... 0.1291 sec/batch\n", "Epoch: 7/20... Training Step: 2972... Training loss: 1.0738... 0.1183 sec/batch\n", "Epoch: 7/20... Training Step: 2973... Training loss: 1.3365... 0.1143 sec/batch\n", "Epoch: 7/20... Training Step: 2974... Training loss: 1.2478... 0.1185 sec/batch\n", "Epoch: 7/20... Training Step: 2975... Training loss: 1.2580... 0.1272 sec/batch\n", "Epoch: 7/20... Training Step: 2976... Training loss: 1.4199... 0.1286 sec/batch\n", "Epoch: 7/20... Training Step: 2977... Training loss: 1.2238... 0.1252 sec/batch\n", "Epoch: 7/20... Training Step: 2978... Training loss: 1.1797... 0.1318 sec/batch\n", "Epoch: 7/20... Training Step: 2979... Training loss: 1.2900... 0.1397 sec/batch\n", "Epoch: 7/20... Training Step: 2980... Training loss: 1.3102... 0.1459 sec/batch\n", "Epoch: 7/20... Training Step: 2981... Training loss: 1.1616... 0.1313 sec/batch\n", "Epoch: 7/20... Training Step: 2982... Training loss: 1.3561... 0.1290 sec/batch\n", "Epoch: 7/20... Training Step: 2983... Training loss: 0.9945... 0.1355 sec/batch\n", "Epoch: 7/20... Training Step: 2984... Training loss: 1.1149... 0.1252 sec/batch\n", "Epoch: 7/20... Training Step: 2985... Training loss: 1.1706... 0.1185 sec/batch\n", "Epoch: 7/20... Training Step: 2986... Training loss: 1.2888... 0.1164 sec/batch\n", "Epoch: 7/20... Training Step: 2987... Training loss: 1.2587... 0.1146 sec/batch\n", "Epoch: 7/20... Training Step: 2988... Training loss: 1.3948... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 2989... Training loss: 1.0784... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 2990... Training loss: 1.1056... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 2991... Training loss: 1.1471... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 2992... Training loss: 1.3841... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 2993... Training loss: 1.2974... 0.1326 sec/batch\n", "Epoch: 7/20... Training Step: 2994... Training loss: 1.1223... 0.1776 sec/batch\n", "Epoch: 7/20... Training Step: 2995... Training loss: 1.1056... 0.1571 sec/batch\n", "Epoch: 7/20... Training Step: 2996... Training loss: 1.3590... 0.1420 sec/batch\n", "Epoch: 7/20... Training Step: 2997... Training loss: 1.3812... 0.1396 sec/batch\n", "Epoch: 7/20... Training Step: 2998... Training loss: 1.2572... 0.1149 sec/batch\n", "Epoch: 7/20... Training Step: 2999... Training loss: 1.3649... 0.1176 sec/batch\n", "Epoch: 7/20... Training Step: 3000... Training loss: 1.1410... 0.1182 sec/batch\n", "Epoch: 7/20... Training Step: 3001... Training loss: 1.3187... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 3002... Training loss: 1.2409... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 3003... Training loss: 1.3818... 0.1233 sec/batch\n", "Epoch: 7/20... Training Step: 3004... Training loss: 1.2768... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3005... Training loss: 1.1818... 0.1230 sec/batch\n", "Epoch: 7/20... Training Step: 3006... Training loss: 1.5467... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3007... Training loss: 1.3526... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 3008... Training loss: 1.5588... 0.1249 sec/batch\n", "Epoch: 7/20... Training Step: 3009... Training loss: 1.2423... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 3010... Training loss: 1.3806... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3011... Training loss: 1.3839... 0.1304 sec/batch\n", "Epoch: 7/20... Training Step: 3012... Training loss: 1.2262... 0.1309 sec/batch\n", "Epoch: 7/20... Training Step: 3013... Training loss: 1.2710... 0.1349 sec/batch\n", "Epoch: 7/20... Training Step: 3014... Training loss: 1.2469... 0.1354 sec/batch\n", "Epoch: 7/20... Training Step: 3015... Training loss: 1.2859... 0.1282 sec/batch\n", "Epoch: 7/20... Training Step: 3016... Training loss: 1.2387... 0.1272 sec/batch\n", "Epoch: 7/20... Training Step: 3017... Training loss: 1.5192... 0.1197 sec/batch\n", "Epoch: 7/20... Training Step: 3018... Training loss: 1.2299... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 3019... Training loss: 1.4920... 0.1184 sec/batch\n", "Epoch: 7/20... Training Step: 3020... Training loss: 1.2855... 0.1184 sec/batch\n", "Epoch: 7/20... Training Step: 3021... Training loss: 1.5329... 0.1233 sec/batch\n", "Epoch: 7/20... Training Step: 3022... Training loss: 1.0556... 0.1228 sec/batch\n", "Epoch: 7/20... Training Step: 3023... Training loss: 1.2808... 0.1174 sec/batch\n", "Epoch: 7/20... Training Step: 3024... Training loss: 1.3961... 0.1226 sec/batch\n", "Epoch: 7/20... Training Step: 3025... Training loss: 1.1147... 0.1225 sec/batch\n", "Epoch: 7/20... Training Step: 3026... Training loss: 1.1854... 0.1183 sec/batch\n", "Epoch: 7/20... Training Step: 3027... Training loss: 1.3955... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 3028... Training loss: 1.3307... 0.1233 sec/batch\n", "Epoch: 7/20... Training Step: 3029... Training loss: 1.3542... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 3030... Training loss: 1.1325... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3031... Training loss: 1.1863... 0.1202 sec/batch\n", "Epoch: 7/20... Training Step: 3032... Training loss: 1.4447... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3033... Training loss: 1.2066... 0.1218 sec/batch\n", "Epoch: 7/20... Training Step: 3034... Training loss: 1.3334... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 3035... Training loss: 1.4566... 0.1270 sec/batch\n", "Epoch: 7/20... Training Step: 3036... Training loss: 1.3020... 0.1329 sec/batch\n", "Epoch: 7/20... Training Step: 3037... Training loss: 1.1827... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 3038... Training loss: 1.2752... 0.1216 sec/batch\n", "Epoch: 7/20... Training Step: 3039... Training loss: 1.2711... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3040... Training loss: 1.3104... 0.1230 sec/batch\n", "Epoch: 7/20... Training Step: 3041... Training loss: 1.3659... 0.1229 sec/batch\n", "Epoch: 7/20... Training Step: 3042... Training loss: 1.1547... 0.1210 sec/batch\n", "Epoch: 7/20... Training Step: 3043... Training loss: 1.1271... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3044... Training loss: 1.2393... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 3045... Training loss: 1.3038... 0.1250 sec/batch\n", "Epoch: 7/20... Training Step: 3046... Training loss: 1.4131... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 3047... Training loss: 1.1953... 0.1241 sec/batch\n", "Epoch: 7/20... Training Step: 3048... Training loss: 1.3392... 0.1192 sec/batch\n", "Epoch: 7/20... Training Step: 3049... Training loss: 1.3827... 0.1146 sec/batch\n", "Epoch: 7/20... Training Step: 3050... Training loss: 1.3188... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 3051... Training loss: 1.3933... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3052... Training loss: 1.4441... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 3053... Training loss: 1.2725... 0.1225 sec/batch\n", "Epoch: 7/20... Training Step: 3054... Training loss: 1.4141... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 3055... Training loss: 1.2908... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3056... Training loss: 1.4566... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 3057... Training loss: 1.3350... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3058... Training loss: 1.2420... 0.1215 sec/batch\n", "Epoch: 7/20... Training Step: 3059... Training loss: 1.3743... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 3060... Training loss: 1.2568... 0.1223 sec/batch\n", "Epoch: 7/20... Training Step: 3061... Training loss: 1.2991... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 3062... Training loss: 1.4898... 0.1215 sec/batch\n", "Epoch: 7/20... Training Step: 3063... Training loss: 1.2120... 0.1263 sec/batch\n", "Epoch: 7/20... Training Step: 3064... Training loss: 1.2244... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3065... Training loss: 1.2063... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3066... Training loss: 1.1842... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 3067... Training loss: 1.2801... 0.1202 sec/batch\n", "Epoch: 7/20... Training Step: 3068... Training loss: 1.2744... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 3069... Training loss: 1.1272... 0.1256 sec/batch\n", "Epoch: 7/20... Training Step: 3070... Training loss: 1.2795... 0.1220 sec/batch\n", "Epoch: 7/20... Training Step: 3071... Training loss: 1.3246... 0.1204 sec/batch\n", "Epoch: 7/20... Training Step: 3072... Training loss: 1.2986... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 3073... Training loss: 1.3096... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 3074... Training loss: 1.2854... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 3075... Training loss: 1.3599... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3076... Training loss: 1.1808... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 3077... Training loss: 1.1529... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3078... Training loss: 1.2824... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 3079... Training loss: 1.2650... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3080... Training loss: 1.4909... 0.1239 sec/batch\n", "Epoch: 7/20... Training Step: 3081... Training loss: 1.2227... 0.1255 sec/batch\n", "Epoch: 7/20... Training Step: 3082... Training loss: 1.2138... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 3083... Training loss: 1.2459... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3084... Training loss: 1.1699... 0.1243 sec/batch\n", "Epoch: 7/20... Training Step: 3085... Training loss: 1.2717... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 3086... Training loss: 1.2582... 0.1228 sec/batch\n", "Epoch: 7/20... Training Step: 3087... Training loss: 1.1604... 0.1242 sec/batch\n", "Epoch: 7/20... Training Step: 3088... Training loss: 1.4051... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 3089... Training loss: 1.1313... 0.1190 sec/batch\n", "Epoch: 7/20... Training Step: 3090... Training loss: 1.2458... 0.1222 sec/batch\n", "Epoch: 7/20... Training Step: 3091... Training loss: 1.2898... 0.1258 sec/batch\n", "Epoch: 7/20... Training Step: 3092... Training loss: 1.6273... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 3093... Training loss: 1.3822... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3094... Training loss: 1.2632... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3095... Training loss: 1.2501... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 3096... Training loss: 1.1641... 0.1200 sec/batch\n", "Epoch: 7/20... Training Step: 3097... Training loss: 1.2217... 0.1224 sec/batch\n", "Epoch: 7/20... Training Step: 3098... Training loss: 1.1319... 0.1251 sec/batch\n", "Epoch: 7/20... Training Step: 3099... Training loss: 0.9536... 0.1206 sec/batch\n", "Epoch: 7/20... Training Step: 3100... Training loss: 1.0892... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 3101... Training loss: 1.1958... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3102... Training loss: 1.2396... 0.1185 sec/batch\n", "Epoch: 7/20... Training Step: 3103... Training loss: 1.2405... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 3104... Training loss: 1.0964... 0.1159 sec/batch\n", "Epoch: 7/20... Training Step: 3105... Training loss: 1.1538... 0.1190 sec/batch\n", "Epoch: 7/20... Training Step: 3106... Training loss: 1.3817... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 3107... Training loss: 1.0997... 0.1244 sec/batch\n", "Epoch: 7/20... Training Step: 3108... Training loss: 1.0852... 0.1218 sec/batch\n", "Epoch: 7/20... Training Step: 3109... Training loss: 1.1240... 0.1254 sec/batch\n", "Epoch: 7/20... Training Step: 3110... Training loss: 0.9437... 0.1216 sec/batch\n", "Epoch: 7/20... Training Step: 3111... Training loss: 1.2613... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3112... Training loss: 1.1648... 0.1221 sec/batch\n", "Epoch: 7/20... Training Step: 3113... Training loss: 1.3288... 0.1311 sec/batch\n", "Epoch: 7/20... Training Step: 3114... Training loss: 1.1001... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 3115... Training loss: 1.2499... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3116... Training loss: 1.2215... 0.1233 sec/batch\n", "Epoch: 7/20... Training Step: 3117... Training loss: 1.0433... 0.1164 sec/batch\n", "Epoch: 7/20... Training Step: 3118... Training loss: 1.2437... 0.1248 sec/batch\n", "Epoch: 7/20... Training Step: 3119... Training loss: 1.2448... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 3120... Training loss: 1.1497... 0.1224 sec/batch\n", "Epoch: 7/20... Training Step: 3121... Training loss: 1.2094... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3122... Training loss: 1.0038... 0.1168 sec/batch\n", "Epoch: 7/20... Training Step: 3123... Training loss: 1.2565... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 3124... Training loss: 1.1723... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 3125... Training loss: 1.1583... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 3126... Training loss: 1.3479... 0.1240 sec/batch\n", "Epoch: 7/20... Training Step: 3127... Training loss: 1.0287... 0.1221 sec/batch\n", "Epoch: 7/20... Training Step: 3128... Training loss: 1.4796... 0.1188 sec/batch\n", "Epoch: 7/20... Training Step: 3129... Training loss: 1.1575... 0.1238 sec/batch\n", "Epoch: 7/20... Training Step: 3130... Training loss: 1.0189... 0.1274 sec/batch\n", "Epoch: 7/20... Training Step: 3131... Training loss: 1.0379... 0.1246 sec/batch\n", "Epoch: 7/20... Training Step: 3132... Training loss: 1.5389... 0.1242 sec/batch\n", "Epoch: 7/20... Training Step: 3133... Training loss: 1.1431... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3134... Training loss: 1.3206... 0.1172 sec/batch\n", "Epoch: 7/20... Training Step: 3135... Training loss: 1.1721... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 3136... Training loss: 1.1851... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 3137... Training loss: 1.0153... 0.1205 sec/batch\n", "Epoch: 7/20... Training Step: 3138... Training loss: 0.9928... 0.1217 sec/batch\n", "Epoch: 7/20... Training Step: 3139... Training loss: 1.2778... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3140... Training loss: 1.2535... 0.1228 sec/batch\n", "Epoch: 7/20... Training Step: 3141... Training loss: 1.0508... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 3142... Training loss: 1.2995... 0.1248 sec/batch\n", "Epoch: 7/20... Training Step: 3143... Training loss: 1.4199... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3144... Training loss: 1.0698... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 3145... Training loss: 1.3312... 0.1240 sec/batch\n", "Epoch: 7/20... Training Step: 3146... Training loss: 1.1414... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 3147... Training loss: 1.1476... 0.1207 sec/batch\n", "Epoch: 7/20... Training Step: 3148... Training loss: 1.1760... 0.1192 sec/batch\n", "Epoch: 7/20... Training Step: 3149... Training loss: 1.2606... 0.1268 sec/batch\n", "Epoch: 7/20... Training Step: 3150... Training loss: 1.2919... 0.1197 sec/batch\n", "Epoch: 7/20... Training Step: 3151... Training loss: 1.1448... 0.1192 sec/batch\n", "Epoch: 7/20... Training Step: 3152... Training loss: 1.3525... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3153... Training loss: 1.3536... 0.1246 sec/batch\n", "Epoch: 7/20... Training Step: 3154... Training loss: 1.2643... 0.1255 sec/batch\n", "Epoch: 7/20... Training Step: 3155... Training loss: 1.1279... 0.1259 sec/batch\n", "Epoch: 7/20... Training Step: 3156... Training loss: 1.1800... 0.1216 sec/batch\n", "Epoch: 7/20... Training Step: 3157... Training loss: 1.0797... 0.1177 sec/batch\n", "Epoch: 7/20... Training Step: 3158... Training loss: 1.3322... 0.1192 sec/batch\n", "Epoch: 7/20... Training Step: 3159... Training loss: 1.2452... 0.1296 sec/batch\n", "Epoch: 7/20... Training Step: 3160... Training loss: 1.2592... 0.1154 sec/batch\n", "Epoch: 7/20... Training Step: 3161... Training loss: 1.3449... 0.1227 sec/batch\n", "Epoch: 7/20... Training Step: 3162... Training loss: 1.3945... 0.1267 sec/batch\n", "Epoch: 7/20... Training Step: 3163... Training loss: 1.1524... 0.1232 sec/batch\n", "Epoch: 7/20... Training Step: 3164... Training loss: 1.2772... 0.1217 sec/batch\n", "Epoch: 7/20... Training Step: 3165... Training loss: 1.1202... 0.1247 sec/batch\n", "Epoch: 7/20... Training Step: 3166... Training loss: 1.1862... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 3167... Training loss: 1.1262... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 3168... Training loss: 1.2611... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 3169... Training loss: 1.1657... 0.1249 sec/batch\n", "Epoch: 7/20... Training Step: 3170... Training loss: 1.2976... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 3171... Training loss: 1.2226... 0.1234 sec/batch\n", "Epoch: 7/20... Training Step: 3172... Training loss: 1.0814... 0.1241 sec/batch\n", "Epoch: 7/20... Training Step: 3173... Training loss: 1.1090... 0.1260 sec/batch\n", "Epoch: 7/20... Training Step: 3174... Training loss: 1.2359... 0.1218 sec/batch\n", "Epoch: 7/20... Training Step: 3175... Training loss: 1.1152... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 3176... Training loss: 1.0353... 0.1213 sec/batch\n", "Epoch: 7/20... Training Step: 3177... Training loss: 1.1522... 0.1173 sec/batch\n", "Epoch: 7/20... Training Step: 3178... Training loss: 1.1130... 0.1174 sec/batch\n", "Epoch: 7/20... Training Step: 3179... Training loss: 1.1318... 0.1260 sec/batch\n", "Epoch: 7/20... Training Step: 3180... Training loss: 1.4845... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3181... Training loss: 1.2188... 0.1237 sec/batch\n", "Epoch: 7/20... Training Step: 3182... Training loss: 1.1568... 0.1211 sec/batch\n", "Epoch: 7/20... Training Step: 3183... Training loss: 1.2438... 0.1191 sec/batch\n", "Epoch: 7/20... Training Step: 3184... Training loss: 1.0755... 0.1173 sec/batch\n", "Epoch: 7/20... Training Step: 3185... Training loss: 1.1461... 0.1219 sec/batch\n", "Epoch: 7/20... Training Step: 3186... Training loss: 1.1495... 0.1245 sec/batch\n", "Epoch: 7/20... Training Step: 3187... Training loss: 1.0971... 0.1208 sec/batch\n", "Epoch: 7/20... Training Step: 3188... Training loss: 1.2161... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 3189... Training loss: 1.3190... 0.1212 sec/batch\n", "Epoch: 7/20... Training Step: 3190... Training loss: 1.4433... 0.1270 sec/batch\n", "Epoch: 7/20... Training Step: 3191... Training loss: 1.1874... 0.1199 sec/batch\n", "Epoch: 7/20... Training Step: 3192... Training loss: 1.5070... 0.1215 sec/batch\n", "Epoch: 7/20... Training Step: 3193... Training loss: 1.3069... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 3194... Training loss: 1.2269... 0.1218 sec/batch\n", "Epoch: 7/20... Training Step: 3195... Training loss: 1.1359... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 3196... Training loss: 1.1190... 0.1262 sec/batch\n", "Epoch: 7/20... Training Step: 3197... Training loss: 1.3502... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 3198... Training loss: 1.2827... 0.1229 sec/batch\n", "Epoch: 7/20... Training Step: 3199... Training loss: 1.3563... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 3200... Training loss: 1.4287... 0.1217 sec/batch\n", "Epoch: 7/20... Training Step: 3201... Training loss: 1.3330... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 3202... Training loss: 1.1027... 0.1203 sec/batch\n", "Epoch: 7/20... Training Step: 3203... Training loss: 1.1840... 0.1211 sec/batch\n", "Epoch: 7/20... Training Step: 3204... Training loss: 1.0667... 0.1236 sec/batch\n", "Epoch: 7/20... Training Step: 3205... Training loss: 1.2822... 0.1183 sec/batch\n", "Epoch: 7/20... Training Step: 3206... Training loss: 1.2431... 0.1190 sec/batch\n", "Epoch: 7/20... Training Step: 3207... Training loss: 1.2613... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3208... Training loss: 1.4000... 0.1229 sec/batch\n", "Epoch: 7/20... Training Step: 3209... Training loss: 1.1610... 0.1239 sec/batch\n", "Epoch: 7/20... Training Step: 3210... Training loss: 1.2771... 0.1214 sec/batch\n", "Epoch: 7/20... Training Step: 3211... Training loss: 1.2774... 0.1231 sec/batch\n", "Epoch: 7/20... Training Step: 3212... Training loss: 1.3404... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 3213... Training loss: 1.2024... 0.1273 sec/batch\n", "Epoch: 7/20... Training Step: 3214... Training loss: 1.2484... 0.1248 sec/batch\n", "Epoch: 7/20... Training Step: 3215... Training loss: 1.5099... 0.1187 sec/batch\n", "Epoch: 7/20... Training Step: 3216... Training loss: 1.2710... 0.1131 sec/batch\n", "Epoch: 7/20... Training Step: 3217... Training loss: 1.3640... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 3218... Training loss: 1.2738... 0.1110 sec/batch\n", "Epoch: 7/20... Training Step: 3219... Training loss: 1.1394... 0.1197 sec/batch\n", "Epoch: 7/20... Training Step: 3220... Training loss: 1.3137... 0.1170 sec/batch\n", "Epoch: 7/20... Training Step: 3221... Training loss: 1.1610... 0.1235 sec/batch\n", "Epoch: 7/20... Training Step: 3222... Training loss: 1.3274... 0.1193 sec/batch\n", "Epoch: 7/20... Training Step: 3223... Training loss: 1.4552... 0.1162 sec/batch\n", "Epoch: 7/20... Training Step: 3224... Training loss: 1.5764... 0.1195 sec/batch\n", "Epoch: 7/20... Training Step: 3225... Training loss: 1.2216... 0.1169 sec/batch\n", "Epoch: 7/20... Training Step: 3226... Training loss: 1.2733... 0.1151 sec/batch\n", "Epoch: 7/20... Training Step: 3227... Training loss: 1.3239... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3228... Training loss: 1.2744... 0.1209 sec/batch\n", "Epoch: 7/20... Training Step: 3229... Training loss: 1.2496... 0.1150 sec/batch\n", "Epoch: 7/20... Training Step: 3230... Training loss: 1.1760... 0.1186 sec/batch\n", "Epoch: 7/20... Training Step: 3231... Training loss: 1.2811... 0.1225 sec/batch\n", "Epoch: 7/20... Training Step: 3232... Training loss: 1.1643... 0.1162 sec/batch\n", "Epoch: 7/20... Training Step: 3233... Training loss: 1.3005... 0.1253 sec/batch\n", "Epoch: 7/20... Training Step: 3234... Training loss: 1.2522... 0.1355 sec/batch\n", "Epoch: 7/20... Training Step: 3235... Training loss: 1.1556... 0.1162 sec/batch\n", "Epoch: 7/20... Training Step: 3236... Training loss: 1.3389... 0.1157 sec/batch\n", "Epoch: 7/20... Training Step: 3237... Training loss: 1.0780... 0.1189 sec/batch\n", "Epoch: 7/20... Training Step: 3238... Training loss: 1.6944... 0.1183 sec/batch\n", "Epoch: 7/20... Training Step: 3239... Training loss: 1.3899... 0.1157 sec/batch\n", "Epoch: 7/20... Training Step: 3240... Training loss: 1.0673... 0.1183 sec/batch\n", "Epoch: 7/20... Training Step: 3241... Training loss: 1.1741... 0.1136 sec/batch\n", "Epoch: 7/20... Training Step: 3242... Training loss: 0.9654... 0.1160 sec/batch\n", "Epoch: 7/20... Training Step: 3243... Training loss: 1.1513... 0.1128 sec/batch\n", "Epoch: 7/20... Training Step: 3244... Training loss: 1.3113... 0.1196 sec/batch\n", "Epoch: 7/20... Training Step: 3245... Training loss: 1.2757... 0.1146 sec/batch\n", "Epoch: 7/20... Training Step: 3246... Training loss: 1.1444... 0.1208 sec/batch\n", "Epoch: 7/20... Training Step: 3247... Training loss: 1.3500... 0.1201 sec/batch\n", "Epoch: 7/20... Training Step: 3248... Training loss: 1.1983... 0.1170 sec/batch\n", "Epoch: 8/20... Training Step: 3249... Training loss: 1.5048... 0.1224 sec/batch\n", "Epoch: 8/20... Training Step: 3250... Training loss: 1.3285... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3251... Training loss: 1.2784... 0.1151 sec/batch\n", "Epoch: 8/20... Training Step: 3252... Training loss: 1.1725... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3253... Training loss: 1.2325... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3254... Training loss: 1.0584... 0.1219 sec/batch\n", "Epoch: 8/20... Training Step: 3255... Training loss: 1.3989... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3256... Training loss: 1.0949... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3257... Training loss: 1.0393... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3258... Training loss: 1.2853... 0.1170 sec/batch\n", "Epoch: 8/20... Training Step: 3259... Training loss: 1.1439... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3260... Training loss: 1.0540... 0.1162 sec/batch\n", "Epoch: 8/20... Training Step: 3261... Training loss: 1.4176... 0.1275 sec/batch\n", "Epoch: 8/20... Training Step: 3262... Training loss: 1.0036... 0.1250 sec/batch\n", "Epoch: 8/20... Training Step: 3263... Training loss: 1.2504... 0.1215 sec/batch\n", "Epoch: 8/20... Training Step: 3264... Training loss: 1.4186... 0.1147 sec/batch\n", "Epoch: 8/20... Training Step: 3265... Training loss: 1.1110... 0.1210 sec/batch\n", "Epoch: 8/20... Training Step: 3266... Training loss: 1.1113... 0.1197 sec/batch\n", "Epoch: 8/20... Training Step: 3267... Training loss: 1.2669... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3268... Training loss: 1.0783... 0.1187 sec/batch\n", "Epoch: 8/20... Training Step: 3269... Training loss: 1.3684... 0.1147 sec/batch\n", "Epoch: 8/20... Training Step: 3270... Training loss: 1.1295... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3271... Training loss: 1.3864... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3272... Training loss: 1.2161... 0.1219 sec/batch\n", "Epoch: 8/20... Training Step: 3273... Training loss: 1.1874... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3274... Training loss: 1.2300... 0.1273 sec/batch\n", "Epoch: 8/20... Training Step: 3275... Training loss: 1.2638... 0.1222 sec/batch\n", "Epoch: 8/20... Training Step: 3276... Training loss: 1.1008... 0.1295 sec/batch\n", "Epoch: 8/20... Training Step: 3277... Training loss: 1.0863... 0.1225 sec/batch\n", "Epoch: 8/20... Training Step: 3278... Training loss: 1.2468... 0.1249 sec/batch\n", "Epoch: 8/20... Training Step: 3279... Training loss: 1.0180... 0.1236 sec/batch\n", "Epoch: 8/20... Training Step: 3280... Training loss: 1.0786... 0.1234 sec/batch\n", "Epoch: 8/20... Training Step: 3281... Training loss: 1.0152... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3282... Training loss: 1.0477... 0.1289 sec/batch\n", "Epoch: 8/20... Training Step: 3283... Training loss: 1.0578... 0.1253 sec/batch\n", "Epoch: 8/20... Training Step: 3284... Training loss: 1.2052... 0.1162 sec/batch\n", "Epoch: 8/20... Training Step: 3285... Training loss: 1.2091... 0.1221 sec/batch\n", "Epoch: 8/20... Training Step: 3286... Training loss: 1.0305... 0.1235 sec/batch\n", "Epoch: 8/20... Training Step: 3287... Training loss: 1.1228... 0.1342 sec/batch\n", "Epoch: 8/20... Training Step: 3288... Training loss: 1.4156... 0.1279 sec/batch\n", "Epoch: 8/20... Training Step: 3289... Training loss: 1.1858... 0.1318 sec/batch\n", "Epoch: 8/20... Training Step: 3290... Training loss: 1.1119... 0.1210 sec/batch\n", "Epoch: 8/20... Training Step: 3291... Training loss: 1.2720... 0.1224 sec/batch\n", "Epoch: 8/20... Training Step: 3292... Training loss: 1.0342... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3293... Training loss: 1.0997... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3294... Training loss: 1.1066... 0.1188 sec/batch\n", "Epoch: 8/20... Training Step: 3295... Training loss: 1.1851... 0.1209 sec/batch\n", "Epoch: 8/20... Training Step: 3296... Training loss: 1.1577... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3297... Training loss: 1.1152... 0.1170 sec/batch\n", "Epoch: 8/20... Training Step: 3298... Training loss: 1.2337... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3299... Training loss: 1.1009... 0.1216 sec/batch\n", "Epoch: 8/20... Training Step: 3300... Training loss: 1.2324... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3301... Training loss: 1.1964... 0.1212 sec/batch\n", "Epoch: 8/20... Training Step: 3302... Training loss: 1.2295... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3303... Training loss: 0.9775... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3304... Training loss: 1.0413... 0.1185 sec/batch\n", "Epoch: 8/20... Training Step: 3305... Training loss: 1.1191... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3306... Training loss: 1.1647... 0.1211 sec/batch\n", "Epoch: 8/20... Training Step: 3307... Training loss: 0.9222... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3308... Training loss: 1.1429... 0.1202 sec/batch\n", "Epoch: 8/20... Training Step: 3309... Training loss: 1.1077... 0.1197 sec/batch\n", "Epoch: 8/20... Training Step: 3310... Training loss: 1.3185... 0.1227 sec/batch\n", "Epoch: 8/20... Training Step: 3311... Training loss: 1.0749... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3312... Training loss: 1.1811... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3313... Training loss: 1.0664... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3314... Training loss: 1.3363... 0.1219 sec/batch\n", "Epoch: 8/20... Training Step: 3315... Training loss: 1.1278... 0.1188 sec/batch\n", "Epoch: 8/20... Training Step: 3316... Training loss: 1.2457... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3317... Training loss: 1.1585... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3318... Training loss: 1.1996... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3319... Training loss: 1.2517... 0.1208 sec/batch\n", "Epoch: 8/20... Training Step: 3320... Training loss: 1.0945... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3321... Training loss: 1.2217... 0.1149 sec/batch\n", "Epoch: 8/20... Training Step: 3322... Training loss: 1.0205... 0.1185 sec/batch\n", "Epoch: 8/20... Training Step: 3323... Training loss: 1.5043... 0.1173 sec/batch\n", "Epoch: 8/20... Training Step: 3324... Training loss: 1.0886... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3325... Training loss: 1.1459... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3326... Training loss: 1.2019... 0.1134 sec/batch\n", "Epoch: 8/20... Training Step: 3327... Training loss: 1.2495... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3328... Training loss: 1.1023... 0.1201 sec/batch\n", "Epoch: 8/20... Training Step: 3329... Training loss: 1.3687... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3330... Training loss: 1.2268... 0.1159 sec/batch\n", "Epoch: 8/20... Training Step: 3331... Training loss: 1.0678... 0.1138 sec/batch\n", "Epoch: 8/20... Training Step: 3332... Training loss: 1.3630... 0.1199 sec/batch\n", "Epoch: 8/20... Training Step: 3333... Training loss: 1.2492... 0.1207 sec/batch\n", "Epoch: 8/20... Training Step: 3334... Training loss: 1.2818... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3335... Training loss: 1.0959... 0.1158 sec/batch\n", "Epoch: 8/20... Training Step: 3336... Training loss: 1.2039... 0.1217 sec/batch\n", "Epoch: 8/20... Training Step: 3337... Training loss: 1.3188... 0.1202 sec/batch\n", "Epoch: 8/20... Training Step: 3338... Training loss: 1.2181... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3339... Training loss: 1.3140... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3340... Training loss: 1.3705... 0.1164 sec/batch\n", "Epoch: 8/20... Training Step: 3341... Training loss: 1.0456... 0.1144 sec/batch\n", "Epoch: 8/20... Training Step: 3342... Training loss: 1.3037... 0.1155 sec/batch\n", "Epoch: 8/20... Training Step: 3343... Training loss: 1.2118... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3344... Training loss: 1.2380... 0.1107 sec/batch\n", "Epoch: 8/20... Training Step: 3345... Training loss: 1.4454... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3346... Training loss: 1.3446... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3347... Training loss: 1.3090... 0.1182 sec/batch\n", "Epoch: 8/20... Training Step: 3348... Training loss: 1.1442... 0.1187 sec/batch\n", "Epoch: 8/20... Training Step: 3349... Training loss: 1.2476... 0.1221 sec/batch\n", "Epoch: 8/20... Training Step: 3350... Training loss: 1.2957... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3351... Training loss: 1.4406... 0.1182 sec/batch\n", "Epoch: 8/20... Training Step: 3352... Training loss: 1.1937... 0.1184 sec/batch\n", "Epoch: 8/20... Training Step: 3353... Training loss: 1.3844... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3354... Training loss: 1.4356... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3355... Training loss: 1.2225... 0.1209 sec/batch\n", "Epoch: 8/20... Training Step: 3356... Training loss: 1.2872... 0.1239 sec/batch\n", "Epoch: 8/20... Training Step: 3357... Training loss: 1.2375... 0.1123 sec/batch\n", "Epoch: 8/20... Training Step: 3358... Training loss: 1.1250... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3359... Training loss: 1.2729... 0.1184 sec/batch\n", "Epoch: 8/20... Training Step: 3360... Training loss: 1.1384... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3361... Training loss: 1.2029... 0.1215 sec/batch\n", "Epoch: 8/20... Training Step: 3362... Training loss: 1.3879... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3363... Training loss: 1.2279... 0.1147 sec/batch\n", "Epoch: 8/20... Training Step: 3364... Training loss: 1.2931... 0.1166 sec/batch\n", "Epoch: 8/20... Training Step: 3365... Training loss: 1.2731... 0.1220 sec/batch\n", "Epoch: 8/20... Training Step: 3366... Training loss: 1.3373... 0.1222 sec/batch\n", "Epoch: 8/20... Training Step: 3367... Training loss: 1.1943... 0.1142 sec/batch\n", "Epoch: 8/20... Training Step: 3368... Training loss: 1.0630... 0.1221 sec/batch\n", "Epoch: 8/20... Training Step: 3369... Training loss: 1.2677... 0.1182 sec/batch\n", "Epoch: 8/20... Training Step: 3370... Training loss: 1.2003... 0.1140 sec/batch\n", "Epoch: 8/20... Training Step: 3371... Training loss: 1.2572... 0.1197 sec/batch\n", "Epoch: 8/20... Training Step: 3372... Training loss: 1.2582... 0.1141 sec/batch\n", "Epoch: 8/20... Training Step: 3373... Training loss: 1.2401... 0.1205 sec/batch\n", "Epoch: 8/20... Training Step: 3374... Training loss: 1.0791... 0.1166 sec/batch\n", "Epoch: 8/20... Training Step: 3375... Training loss: 1.0402... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3376... Training loss: 1.3166... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3377... Training loss: 1.2331... 0.1151 sec/batch\n", "Epoch: 8/20... Training Step: 3378... Training loss: 1.2786... 0.1227 sec/batch\n", "Epoch: 8/20... Training Step: 3379... Training loss: 1.5033... 0.1166 sec/batch\n", "Epoch: 8/20... Training Step: 3380... Training loss: 1.2407... 0.1162 sec/batch\n", "Epoch: 8/20... Training Step: 3381... Training loss: 1.2283... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3382... Training loss: 1.3507... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3383... Training loss: 1.0793... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3384... Training loss: 0.9898... 0.1251 sec/batch\n", "Epoch: 8/20... Training Step: 3385... Training loss: 1.0367... 0.1183 sec/batch\n", "Epoch: 8/20... Training Step: 3386... Training loss: 1.2980... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3387... Training loss: 1.1272... 0.1252 sec/batch\n", "Epoch: 8/20... Training Step: 3388... Training loss: 1.1748... 0.1325 sec/batch\n", "Epoch: 8/20... Training Step: 3389... Training loss: 1.0833... 0.1297 sec/batch\n", "Epoch: 8/20... Training Step: 3390... Training loss: 1.0836... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3391... Training loss: 1.0377... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3392... Training loss: 1.2196... 0.1162 sec/batch\n", "Epoch: 8/20... Training Step: 3393... Training loss: 1.2394... 0.1141 sec/batch\n", "Epoch: 8/20... Training Step: 3394... Training loss: 1.1393... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3395... Training loss: 1.1310... 0.1150 sec/batch\n", "Epoch: 8/20... Training Step: 3396... Training loss: 1.0669... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3397... Training loss: 1.3027... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3398... Training loss: 1.3562... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3399... Training loss: 1.2296... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3400... Training loss: 1.1442... 0.1155 sec/batch\n", "Epoch: 8/20... Training Step: 3401... Training loss: 1.4094... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3402... Training loss: 1.2341... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3403... Training loss: 1.1850... 0.1129 sec/batch\n", "Epoch: 8/20... Training Step: 3404... Training loss: 1.2147... 0.1159 sec/batch\n", "Epoch: 8/20... Training Step: 3405... Training loss: 1.1028... 0.1141 sec/batch\n", "Epoch: 8/20... Training Step: 3406... Training loss: 1.2325... 0.1124 sec/batch\n", "Epoch: 8/20... Training Step: 3407... Training loss: 1.0698... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3408... Training loss: 1.1489... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3409... Training loss: 1.3284... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3410... Training loss: 1.1731... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3411... Training loss: 1.4068... 0.1136 sec/batch\n", "Epoch: 8/20... Training Step: 3412... Training loss: 1.1216... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3413... Training loss: 1.3087... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3414... Training loss: 1.1461... 0.1145 sec/batch\n", "Epoch: 8/20... Training Step: 3415... Training loss: 1.1127... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3416... Training loss: 1.2975... 0.1223 sec/batch\n", "Epoch: 8/20... Training Step: 3417... Training loss: 1.1204... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3418... Training loss: 1.3186... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3419... Training loss: 1.2813... 0.1183 sec/batch\n", "Epoch: 8/20... Training Step: 3420... Training loss: 1.4530... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3421... Training loss: 1.1071... 0.1128 sec/batch\n", "Epoch: 8/20... Training Step: 3422... Training loss: 1.1599... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3423... Training loss: 1.3531... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3424... Training loss: 1.0668... 0.1200 sec/batch\n", "Epoch: 8/20... Training Step: 3425... Training loss: 1.0775... 0.1166 sec/batch\n", "Epoch: 8/20... Training Step: 3426... Training loss: 1.2885... 0.1202 sec/batch\n", "Epoch: 8/20... Training Step: 3427... Training loss: 1.0786... 0.1200 sec/batch\n", "Epoch: 8/20... Training Step: 3428... Training loss: 1.2705... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3429... Training loss: 1.0824... 0.1166 sec/batch\n", "Epoch: 8/20... Training Step: 3430... Training loss: 1.3983... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3431... Training loss: 1.2709... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3432... Training loss: 1.1623... 0.1233 sec/batch\n", "Epoch: 8/20... Training Step: 3433... Training loss: 1.3321... 0.1226 sec/batch\n", "Epoch: 8/20... Training Step: 3434... Training loss: 1.2467... 0.1234 sec/batch\n", "Epoch: 8/20... Training Step: 3435... Training loss: 1.3658... 0.1319 sec/batch\n", "Epoch: 8/20... Training Step: 3436... Training loss: 0.9808... 0.1213 sec/batch\n", "Epoch: 8/20... Training Step: 3437... Training loss: 1.2422... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3438... Training loss: 1.1700... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3439... Training loss: 1.1619... 0.1185 sec/batch\n", "Epoch: 8/20... Training Step: 3440... Training loss: 1.3323... 0.1220 sec/batch\n", "Epoch: 8/20... Training Step: 3441... Training loss: 1.2559... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3442... Training loss: 1.2831... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3443... Training loss: 1.2767... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3444... Training loss: 1.2116... 0.1246 sec/batch\n", "Epoch: 8/20... Training Step: 3445... Training loss: 1.0955... 0.1359 sec/batch\n", "Epoch: 8/20... Training Step: 3446... Training loss: 1.2556... 0.1365 sec/batch\n", "Epoch: 8/20... Training Step: 3447... Training loss: 0.9638... 0.1266 sec/batch\n", "Epoch: 8/20... Training Step: 3448... Training loss: 1.2205... 0.1248 sec/batch\n", "Epoch: 8/20... Training Step: 3449... Training loss: 1.1820... 0.1213 sec/batch\n", "Epoch: 8/20... Training Step: 3450... Training loss: 1.2127... 0.1149 sec/batch\n", "Epoch: 8/20... Training Step: 3451... Training loss: 1.1697... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3452... Training loss: 1.2831... 0.1224 sec/batch\n", "Epoch: 8/20... Training Step: 3453... Training loss: 1.1755... 0.1345 sec/batch\n", "Epoch: 8/20... Training Step: 3454... Training loss: 1.0929... 0.1398 sec/batch\n", "Epoch: 8/20... Training Step: 3455... Training loss: 1.1790... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3456... Training loss: 1.2462... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3457... Training loss: 1.2278... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3458... Training loss: 1.0611... 0.1182 sec/batch\n", "Epoch: 8/20... Training Step: 3459... Training loss: 1.0867... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3460... Training loss: 1.2865... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3461... Training loss: 1.3636... 0.1198 sec/batch\n", "Epoch: 8/20... Training Step: 3462... Training loss: 1.1519... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3463... Training loss: 1.3023... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3464... Training loss: 1.2019... 0.1164 sec/batch\n", "Epoch: 8/20... Training Step: 3465... Training loss: 1.2683... 0.1208 sec/batch\n", "Epoch: 8/20... Training Step: 3466... Training loss: 1.1807... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3467... Training loss: 1.4081... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3468... Training loss: 1.1620... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3469... Training loss: 1.1256... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3470... Training loss: 1.5012... 0.1190 sec/batch\n", "Epoch: 8/20... Training Step: 3471... Training loss: 1.3165... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3472... Training loss: 1.5198... 0.1152 sec/batch\n", "Epoch: 8/20... Training Step: 3473... Training loss: 1.2766... 0.1203 sec/batch\n", "Epoch: 8/20... Training Step: 3474... Training loss: 1.3748... 0.1187 sec/batch\n", "Epoch: 8/20... Training Step: 3475... Training loss: 1.4035... 0.1136 sec/batch\n", "Epoch: 8/20... Training Step: 3476... Training loss: 1.1263... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3477... Training loss: 1.2199... 0.1226 sec/batch\n", "Epoch: 8/20... Training Step: 3478... Training loss: 1.1208... 0.1265 sec/batch\n", "Epoch: 8/20... Training Step: 3479... Training loss: 1.2814... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3480... Training loss: 1.1796... 0.1273 sec/batch\n", "Epoch: 8/20... Training Step: 3481... Training loss: 1.4161... 0.1184 sec/batch\n", "Epoch: 8/20... Training Step: 3482... Training loss: 1.1744... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3483... Training loss: 1.4632... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3484... Training loss: 1.2591... 0.1173 sec/batch\n", "Epoch: 8/20... Training Step: 3485... Training loss: 1.4350... 0.1200 sec/batch\n", "Epoch: 8/20... Training Step: 3486... Training loss: 0.9963... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3487... Training loss: 1.2543... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3488... Training loss: 1.2731... 0.1210 sec/batch\n", "Epoch: 8/20... Training Step: 3489... Training loss: 1.2142... 0.1184 sec/batch\n", "Epoch: 8/20... Training Step: 3490... Training loss: 1.1756... 0.1190 sec/batch\n", "Epoch: 8/20... Training Step: 3491... Training loss: 1.4179... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3492... Training loss: 1.2849... 0.1125 sec/batch\n", "Epoch: 8/20... Training Step: 3493... Training loss: 1.2010... 0.1098 sec/batch\n", "Epoch: 8/20... Training Step: 3494... Training loss: 1.1039... 0.1154 sec/batch\n", "Epoch: 8/20... Training Step: 3495... Training loss: 1.1747... 0.1117 sec/batch\n", "Epoch: 8/20... Training Step: 3496... Training loss: 1.3154... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3497... Training loss: 1.1856... 0.1119 sec/batch\n", "Epoch: 8/20... Training Step: 3498... Training loss: 1.2362... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3499... Training loss: 1.5139... 0.1151 sec/batch\n", "Epoch: 8/20... Training Step: 3500... Training loss: 1.1847... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3501... Training loss: 1.1698... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3502... Training loss: 1.1883... 0.1144 sec/batch\n", "Epoch: 8/20... Training Step: 3503... Training loss: 1.2426... 0.1173 sec/batch\n", "Epoch: 8/20... Training Step: 3504... Training loss: 1.3160... 0.1121 sec/batch\n", "Epoch: 8/20... Training Step: 3505... Training loss: 1.4383... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3506... Training loss: 1.0693... 0.1270 sec/batch\n", "Epoch: 8/20... Training Step: 3507... Training loss: 1.0964... 0.1257 sec/batch\n", "Epoch: 8/20... Training Step: 3508... Training loss: 1.1918... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3509... Training loss: 1.2756... 0.1229 sec/batch\n", "Epoch: 8/20... Training Step: 3510... Training loss: 1.3913... 0.1305 sec/batch\n", "Epoch: 8/20... Training Step: 3511... Training loss: 1.1669... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3512... Training loss: 1.2816... 0.1140 sec/batch\n", "Epoch: 8/20... Training Step: 3513... Training loss: 1.3139... 0.1111 sec/batch\n", "Epoch: 8/20... Training Step: 3514... Training loss: 1.2217... 0.1222 sec/batch\n", "Epoch: 8/20... Training Step: 3515... Training loss: 1.5079... 0.1138 sec/batch\n", "Epoch: 8/20... Training Step: 3516... Training loss: 1.3340... 0.1185 sec/batch\n", "Epoch: 8/20... Training Step: 3517... Training loss: 1.3063... 0.1154 sec/batch\n", "Epoch: 8/20... Training Step: 3518... Training loss: 1.3483... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3519... Training loss: 1.2842... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3520... Training loss: 1.4193... 0.1161 sec/batch\n", "Epoch: 8/20... Training Step: 3521... Training loss: 1.3003... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3522... Training loss: 1.2223... 0.1152 sec/batch\n", "Epoch: 8/20... Training Step: 3523... Training loss: 1.3015... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3524... Training loss: 1.1168... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3525... Training loss: 1.0948... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3526... Training loss: 1.3710... 0.1122 sec/batch\n", "Epoch: 8/20... Training Step: 3527... Training loss: 1.2253... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3528... Training loss: 1.2259... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3529... Training loss: 1.2264... 0.1164 sec/batch\n", "Epoch: 8/20... Training Step: 3530... Training loss: 1.1534... 0.1144 sec/batch\n", "Epoch: 8/20... Training Step: 3531... Training loss: 1.1764... 0.1188 sec/batch\n", "Epoch: 8/20... Training Step: 3532... Training loss: 1.2178... 0.1173 sec/batch\n", "Epoch: 8/20... Training Step: 3533... Training loss: 1.0923... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3534... Training loss: 1.2170... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3535... Training loss: 1.2378... 0.1183 sec/batch\n", "Epoch: 8/20... Training Step: 3536... Training loss: 1.2721... 0.1173 sec/batch\n", "Epoch: 8/20... Training Step: 3537... Training loss: 1.2870... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3538... Training loss: 1.2874... 0.1183 sec/batch\n", "Epoch: 8/20... Training Step: 3539... Training loss: 1.1355... 0.1198 sec/batch\n", "Epoch: 8/20... Training Step: 3540... Training loss: 1.1787... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3541... Training loss: 1.1525... 0.1142 sec/batch\n", "Epoch: 8/20... Training Step: 3542... Training loss: 1.1899... 0.1112 sec/batch\n", "Epoch: 8/20... Training Step: 3543... Training loss: 1.2645... 0.1140 sec/batch\n", "Epoch: 8/20... Training Step: 3544... Training loss: 1.4336... 0.1206 sec/batch\n", "Epoch: 8/20... Training Step: 3545... Training loss: 1.1304... 0.1188 sec/batch\n", "Epoch: 8/20... Training Step: 3546... Training loss: 1.1376... 0.1143 sec/batch\n", "Epoch: 8/20... Training Step: 3547... Training loss: 1.2392... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3548... Training loss: 1.1892... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3549... Training loss: 1.2131... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3550... Training loss: 1.1614... 0.1202 sec/batch\n", "Epoch: 8/20... Training Step: 3551... Training loss: 1.0127... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3552... Training loss: 1.3428... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3553... Training loss: 1.1150... 0.1158 sec/batch\n", "Epoch: 8/20... Training Step: 3554... Training loss: 1.2468... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3555... Training loss: 1.2675... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3556... Training loss: 1.5856... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3557... Training loss: 1.4071... 0.1190 sec/batch\n", "Epoch: 8/20... Training Step: 3558... Training loss: 1.3550... 0.1233 sec/batch\n", "Epoch: 8/20... Training Step: 3559... Training loss: 1.2834... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3560... Training loss: 1.1250... 0.1198 sec/batch\n", "Epoch: 8/20... Training Step: 3561... Training loss: 1.2427... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3562... Training loss: 1.2434... 0.1154 sec/batch\n", "Epoch: 8/20... Training Step: 3563... Training loss: 0.9807... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3564... Training loss: 1.0360... 0.1152 sec/batch\n", "Epoch: 8/20... Training Step: 3565... Training loss: 1.1987... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3566... Training loss: 1.1793... 0.1219 sec/batch\n", "Epoch: 8/20... Training Step: 3567... Training loss: 1.1244... 0.1164 sec/batch\n", "Epoch: 8/20... Training Step: 3568... Training loss: 1.1163... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3569... Training loss: 1.1717... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3570... Training loss: 1.3050... 0.1201 sec/batch\n", "Epoch: 8/20... Training Step: 3571... Training loss: 1.0987... 0.1217 sec/batch\n", "Epoch: 8/20... Training Step: 3572... Training loss: 1.1090... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3573... Training loss: 1.0227... 0.1201 sec/batch\n", "Epoch: 8/20... Training Step: 3574... Training loss: 1.0322... 0.1216 sec/batch\n", "Epoch: 8/20... Training Step: 3575... Training loss: 1.2717... 0.1207 sec/batch\n", "Epoch: 8/20... Training Step: 3576... Training loss: 1.1401... 0.1138 sec/batch\n", "Epoch: 8/20... Training Step: 3577... Training loss: 1.3145... 0.1148 sec/batch\n", "Epoch: 8/20... Training Step: 3578... Training loss: 1.0755... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3579... Training loss: 1.2717... 0.1190 sec/batch\n", "Epoch: 8/20... Training Step: 3580... Training loss: 1.2385... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3581... Training loss: 1.0678... 0.1204 sec/batch\n", "Epoch: 8/20... Training Step: 3582... Training loss: 1.1512... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3583... Training loss: 1.3370... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3584... Training loss: 1.1858... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3585... Training loss: 1.2930... 0.1167 sec/batch\n", "Epoch: 8/20... Training Step: 3586... Training loss: 1.0456... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3587... Training loss: 1.2738... 0.1191 sec/batch\n", "Epoch: 8/20... Training Step: 3588... Training loss: 1.0772... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3589... Training loss: 1.1081... 0.1137 sec/batch\n", "Epoch: 8/20... Training Step: 3590... Training loss: 1.2896... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3591... Training loss: 1.0592... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3592... Training loss: 1.3552... 0.1204 sec/batch\n", "Epoch: 8/20... Training Step: 3593... Training loss: 1.1708... 0.1133 sec/batch\n", "Epoch: 8/20... Training Step: 3594... Training loss: 0.9883... 0.1209 sec/batch\n", "Epoch: 8/20... Training Step: 3595... Training loss: 1.0935... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3596... Training loss: 1.6098... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3597... Training loss: 1.1201... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3598... Training loss: 1.2033... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3599... Training loss: 1.2012... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3600... Training loss: 1.0117... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3601... Training loss: 0.9879... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3602... Training loss: 0.9650... 0.1158 sec/batch\n", "Epoch: 8/20... Training Step: 3603... Training loss: 1.2634... 0.1162 sec/batch\n", "Epoch: 8/20... Training Step: 3604... Training loss: 1.0917... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3605... Training loss: 1.0822... 0.1157 sec/batch\n", "Epoch: 8/20... Training Step: 3606... Training loss: 1.1990... 0.1204 sec/batch\n", "Epoch: 8/20... Training Step: 3607... Training loss: 1.2827... 0.1208 sec/batch\n", "Epoch: 8/20... Training Step: 3608... Training loss: 0.9170... 0.1204 sec/batch\n", "Epoch: 8/20... Training Step: 3609... Training loss: 1.3381... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3610... Training loss: 1.2600... 0.1138 sec/batch\n", "Epoch: 8/20... Training Step: 3611... Training loss: 1.1378... 0.1143 sec/batch\n", "Epoch: 8/20... Training Step: 3612... Training loss: 1.0712... 0.1182 sec/batch\n", "Epoch: 8/20... Training Step: 3613... Training loss: 1.0983... 0.1206 sec/batch\n", "Epoch: 8/20... Training Step: 3614... Training loss: 1.2849... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3615... Training loss: 1.1254... 0.1195 sec/batch\n", "Epoch: 8/20... Training Step: 3616... Training loss: 1.4476... 0.1210 sec/batch\n", "Epoch: 8/20... Training Step: 3617... Training loss: 1.2753... 0.1178 sec/batch\n", "Epoch: 8/20... Training Step: 3618... Training loss: 1.1892... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3619... Training loss: 1.0789... 0.1158 sec/batch\n", "Epoch: 8/20... Training Step: 3620... Training loss: 1.3881... 0.1132 sec/batch\n", "Epoch: 8/20... Training Step: 3621... Training loss: 1.1967... 0.1170 sec/batch\n", "Epoch: 8/20... Training Step: 3622... Training loss: 1.2696... 0.1197 sec/batch\n", "Epoch: 8/20... Training Step: 3623... Training loss: 1.2152... 0.1213 sec/batch\n", "Epoch: 8/20... Training Step: 3624... Training loss: 1.3095... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3625... Training loss: 1.3215... 0.1202 sec/batch\n", "Epoch: 8/20... Training Step: 3626... Training loss: 1.3250... 0.1208 sec/batch\n", "Epoch: 8/20... Training Step: 3627... Training loss: 1.1718... 0.1196 sec/batch\n", "Epoch: 8/20... Training Step: 3628... Training loss: 1.2460... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3629... Training loss: 1.1353... 0.1238 sec/batch\n", "Epoch: 8/20... Training Step: 3630... Training loss: 1.1156... 0.1199 sec/batch\n", "Epoch: 8/20... Training Step: 3631... Training loss: 1.0353... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3632... Training loss: 1.1766... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3633... Training loss: 1.1169... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3634... Training loss: 1.3121... 0.1113 sec/batch\n", "Epoch: 8/20... Training Step: 3635... Training loss: 1.1549... 0.1145 sec/batch\n", "Epoch: 8/20... Training Step: 3636... Training loss: 1.0469... 0.1205 sec/batch\n", "Epoch: 8/20... Training Step: 3637... Training loss: 1.0398... 0.1293 sec/batch\n", "Epoch: 8/20... Training Step: 3638... Training loss: 1.1428... 0.1231 sec/batch\n", "Epoch: 8/20... Training Step: 3639... Training loss: 1.0695... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3640... Training loss: 1.1162... 0.1140 sec/batch\n", "Epoch: 8/20... Training Step: 3641... Training loss: 1.1384... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3642... Training loss: 1.1871... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3643... Training loss: 1.0643... 0.1192 sec/batch\n", "Epoch: 8/20... Training Step: 3644... Training loss: 1.3138... 0.1176 sec/batch\n", "Epoch: 8/20... Training Step: 3645... Training loss: 1.1776... 0.1201 sec/batch\n", "Epoch: 8/20... Training Step: 3646... Training loss: 1.1316... 0.1171 sec/batch\n", "Epoch: 8/20... Training Step: 3647... Training loss: 1.1926... 0.1089 sec/batch\n", "Epoch: 8/20... Training Step: 3648... Training loss: 1.1154... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3649... Training loss: 1.2040... 0.1131 sec/batch\n", "Epoch: 8/20... Training Step: 3650... Training loss: 1.0210... 0.1150 sec/batch\n", "Epoch: 8/20... Training Step: 3651... Training loss: 1.0906... 0.1159 sec/batch\n", "Epoch: 8/20... Training Step: 3652... Training loss: 1.3195... 0.1149 sec/batch\n", "Epoch: 8/20... Training Step: 3653... Training loss: 1.1210... 0.1115 sec/batch\n", "Epoch: 8/20... Training Step: 3654... Training loss: 1.5398... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3655... Training loss: 1.1332... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3656... Training loss: 1.5540... 0.1148 sec/batch\n", "Epoch: 8/20... Training Step: 3657... Training loss: 1.2730... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3658... Training loss: 1.1283... 0.1135 sec/batch\n", "Epoch: 8/20... Training Step: 3659... Training loss: 1.0646... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3660... Training loss: 1.1104... 0.1187 sec/batch\n", "Epoch: 8/20... Training Step: 3661... Training loss: 1.2510... 0.1231 sec/batch\n", "Epoch: 8/20... Training Step: 3662... Training loss: 1.2064... 0.1154 sec/batch\n", "Epoch: 8/20... Training Step: 3663... Training loss: 1.3209... 0.1128 sec/batch\n", "Epoch: 8/20... Training Step: 3664... Training loss: 1.3347... 0.1207 sec/batch\n", "Epoch: 8/20... Training Step: 3665... Training loss: 1.3407... 0.1186 sec/batch\n", "Epoch: 8/20... Training Step: 3666... Training loss: 1.0755... 0.1167 sec/batch\n", "Epoch: 8/20... Training Step: 3667... Training loss: 1.2830... 0.1142 sec/batch\n", "Epoch: 8/20... Training Step: 3668... Training loss: 1.0141... 0.1163 sec/batch\n", "Epoch: 8/20... Training Step: 3669... Training loss: 1.2766... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3670... Training loss: 1.3100... 0.1188 sec/batch\n", "Epoch: 8/20... Training Step: 3671... Training loss: 1.3143... 0.1161 sec/batch\n", "Epoch: 8/20... Training Step: 3672... Training loss: 1.4252... 0.1194 sec/batch\n", "Epoch: 8/20... Training Step: 3673... Training loss: 1.2260... 0.1214 sec/batch\n", "Epoch: 8/20... Training Step: 3674... Training loss: 1.2051... 0.1179 sec/batch\n", "Epoch: 8/20... Training Step: 3675... Training loss: 1.1938... 0.1156 sec/batch\n", "Epoch: 8/20... Training Step: 3676... Training loss: 1.2538... 0.1196 sec/batch\n", "Epoch: 8/20... Training Step: 3677... Training loss: 1.0626... 0.1137 sec/batch\n", "Epoch: 8/20... Training Step: 3678... Training loss: 1.2141... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3679... Training loss: 1.3506... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3680... Training loss: 1.3112... 0.1135 sec/batch\n", "Epoch: 8/20... Training Step: 3681... Training loss: 1.4658... 0.1160 sec/batch\n", "Epoch: 8/20... Training Step: 3682... Training loss: 1.2654... 0.1189 sec/batch\n", "Epoch: 8/20... Training Step: 3683... Training loss: 1.0896... 0.1183 sec/batch\n", "Epoch: 8/20... Training Step: 3684... Training loss: 1.2297... 0.1165 sec/batch\n", "Epoch: 8/20... Training Step: 3685... Training loss: 1.2225... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3686... Training loss: 1.2966... 0.1227 sec/batch\n", "Epoch: 8/20... Training Step: 3687... Training loss: 1.3977... 0.1146 sec/batch\n", "Epoch: 8/20... Training Step: 3688... Training loss: 1.4859... 0.1101 sec/batch\n", "Epoch: 8/20... Training Step: 3689... Training loss: 1.0861... 0.1148 sec/batch\n", "Epoch: 8/20... Training Step: 3690... Training loss: 1.1856... 0.1181 sec/batch\n", "Epoch: 8/20... Training Step: 3691... Training loss: 1.2494... 0.1175 sec/batch\n", "Epoch: 8/20... Training Step: 3692... Training loss: 1.2289... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3693... Training loss: 1.2114... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3694... Training loss: 1.1489... 0.1221 sec/batch\n", "Epoch: 8/20... Training Step: 3695... Training loss: 1.3059... 0.1174 sec/batch\n", "Epoch: 8/20... Training Step: 3696... Training loss: 1.1364... 0.1172 sec/batch\n", "Epoch: 8/20... Training Step: 3697... Training loss: 1.2428... 0.1161 sec/batch\n", "Epoch: 8/20... Training Step: 3698... Training loss: 1.2286... 0.1239 sec/batch\n", "Epoch: 8/20... Training Step: 3699... Training loss: 1.1761... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3700... Training loss: 1.2923... 0.1169 sec/batch\n", "Epoch: 8/20... Training Step: 3701... Training loss: 1.0435... 0.1196 sec/batch\n", "Epoch: 8/20... Training Step: 3702... Training loss: 1.4672... 0.1187 sec/batch\n", "Epoch: 8/20... Training Step: 3703... Training loss: 1.2033... 0.1142 sec/batch\n", "Epoch: 8/20... Training Step: 3704... Training loss: 1.0732... 0.1128 sec/batch\n", "Epoch: 8/20... Training Step: 3705... Training loss: 1.1276... 0.1180 sec/batch\n", "Epoch: 8/20... Training Step: 3706... Training loss: 1.0397... 0.1153 sec/batch\n", "Epoch: 8/20... Training Step: 3707... Training loss: 1.0802... 0.1177 sec/batch\n", "Epoch: 8/20... Training Step: 3708... Training loss: 1.2413... 0.1199 sec/batch\n", "Epoch: 8/20... Training Step: 3709... Training loss: 1.2985... 0.1155 sec/batch\n", "Epoch: 8/20... Training Step: 3710... Training loss: 1.1363... 0.1168 sec/batch\n", "Epoch: 8/20... Training Step: 3711... Training loss: 1.2168... 0.1142 sec/batch\n", "Epoch: 8/20... Training Step: 3712... Training loss: 1.1327... 0.1150 sec/batch\n", "Epoch: 9/20... Training Step: 3713... Training loss: 1.5320... 0.1194 sec/batch\n", "Epoch: 9/20... Training Step: 3714... Training loss: 1.2747... 0.1192 sec/batch\n", "Epoch: 9/20... Training Step: 3715... Training loss: 1.1972... 0.1171 sec/batch\n", "Epoch: 9/20... Training Step: 3716... Training loss: 1.2015... 0.1151 sec/batch\n", "Epoch: 9/20... Training Step: 3717... Training loss: 1.2670... 0.1165 sec/batch\n", "Epoch: 9/20... Training Step: 3718... Training loss: 1.0613... 0.1185 sec/batch\n", "Epoch: 9/20... Training Step: 3719... Training loss: 1.4628... 0.1173 sec/batch\n", "Epoch: 9/20... Training Step: 3720... Training loss: 1.1439... 0.1180 sec/batch\n", "Epoch: 9/20... Training Step: 3721... Training loss: 1.0168... 0.1175 sec/batch\n", "Epoch: 9/20... Training Step: 3722... Training loss: 1.2820... 0.1141 sec/batch\n", "Epoch: 9/20... Training Step: 3723... Training loss: 1.1704... 0.1134 sec/batch\n", "Epoch: 9/20... Training Step: 3724... Training loss: 1.0800... 0.1145 sec/batch\n", "Epoch: 9/20... Training Step: 3725... Training loss: 1.3321... 0.1200 sec/batch\n", "Epoch: 9/20... Training Step: 3726... Training loss: 0.9963... 0.1175 sec/batch\n", "Epoch: 9/20... Training Step: 3727... Training loss: 1.1766... 0.1175 sec/batch\n", "Epoch: 9/20... Training Step: 3728... Training loss: 1.2556... 0.1185 sec/batch\n", "Epoch: 9/20... Training Step: 3729... Training loss: 1.1061... 0.1148 sec/batch\n", "Epoch: 9/20... Training Step: 3730... Training loss: 1.1364... 0.1182 sec/batch\n", "Epoch: 9/20... Training Step: 3731... Training loss: 1.2040... 0.1182 sec/batch\n", "Epoch: 9/20... Training Step: 3732... Training loss: 1.1218... 0.1174 sec/batch\n", "Epoch: 9/20... Training Step: 3733... Training loss: 1.2983... 0.1210 sec/batch\n", "Epoch: 9/20... Training Step: 3734... Training loss: 1.0552... 0.1157 sec/batch\n", "Epoch: 9/20... Training Step: 3735... Training loss: 1.2856... 0.1163 sec/batch\n", "Epoch: 9/20... Training Step: 3736... Training loss: 1.1001... 0.1184 sec/batch\n", "Epoch: 9/20... Training Step: 3737... Training loss: 1.1027... 0.1204 sec/batch\n", "Epoch: 9/20... Training Step: 3738... Training loss: 1.0988... 0.1155 sec/batch\n", "Epoch: 9/20... Training Step: 3739... Training loss: 1.3067... 0.1169 sec/batch\n", "Epoch: 9/20... Training Step: 3740... Training loss: 1.0097... 0.1214 sec/batch\n", "Epoch: 9/20... Training Step: 3741... Training loss: 1.0685... 0.1160 sec/batch\n", "Epoch: 9/20... Training Step: 3742... Training loss: 1.1552... 0.1122 sec/batch\n", "Epoch: 9/20... Training Step: 3743... Training loss: 1.0153... 0.1182 sec/batch\n", "Epoch: 9/20... Training Step: 3744... Training loss: 1.1073... 0.1149 sec/batch\n", "Epoch: 9/20... Training Step: 3745... Training loss: 1.0013... 0.1156 sec/batch\n", "Epoch: 9/20... Training Step: 3746... Training loss: 0.9218... 0.1199 sec/batch\n", "Epoch: 9/20... Training Step: 3747... Training loss: 1.0162... 0.1220 sec/batch\n", "Epoch: 9/20... Training Step: 3748... Training loss: 0.9840... 0.1172 sec/batch\n", "Epoch: 9/20... Training Step: 3749... Training loss: 1.1616... 0.1185 sec/batch\n", "Epoch: 9/20... Training Step: 3750... Training loss: 1.0463... 0.1174 sec/batch\n", "Epoch: 9/20... Training Step: 3751... Training loss: 0.9711... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 3752... Training loss: 1.4550... 0.1207 sec/batch\n", "Epoch: 9/20... Training Step: 3753... Training loss: 1.1485... 0.1209 sec/batch\n", "Epoch: 9/20... Training Step: 3754... Training loss: 1.0232... 0.1188 sec/batch\n", "Epoch: 9/20... Training Step: 3755... Training loss: 1.3335... 0.1177 sec/batch\n", "Epoch: 9/20... Training Step: 3756... Training loss: 0.9102... 0.1187 sec/batch\n", "Epoch: 9/20... Training Step: 3757... Training loss: 1.1285... 0.1183 sec/batch\n", "Epoch: 9/20... Training Step: 3758... Training loss: 1.1115... 0.1141 sec/batch\n", "Epoch: 9/20... Training Step: 3759... Training loss: 1.2281... 0.1206 sec/batch\n", "Epoch: 9/20... Training Step: 3760... Training loss: 1.0665... 0.1166 sec/batch\n", "Epoch: 9/20... Training Step: 3761... Training loss: 1.0426... 0.1190 sec/batch\n", "Epoch: 9/20... Training Step: 3762... Training loss: 1.1176... 0.1146 sec/batch\n", "Epoch: 9/20... Training Step: 3763... Training loss: 1.1277... 0.1133 sec/batch\n", "Epoch: 9/20... Training Step: 3764... Training loss: 1.1510... 0.1164 sec/batch\n", "Epoch: 9/20... Training Step: 3765... Training loss: 1.1701... 0.1211 sec/batch\n", "Epoch: 9/20... Training Step: 3766... Training loss: 1.2572... 0.1157 sec/batch\n", "Epoch: 9/20... Training Step: 3767... Training loss: 1.0497... 0.1177 sec/batch\n", "Epoch: 9/20... Training Step: 3768... Training loss: 1.1780... 0.1191 sec/batch\n", "Epoch: 9/20... Training Step: 3769... Training loss: 1.1573... 0.1236 sec/batch\n", "Epoch: 9/20... Training Step: 3770... Training loss: 1.2053... 0.1181 sec/batch\n", "Epoch: 9/20... Training Step: 3771... Training loss: 1.0686... 0.1172 sec/batch\n", "Epoch: 9/20... Training Step: 3772... Training loss: 1.0564... 0.1165 sec/batch\n", "Epoch: 9/20... Training Step: 3773... Training loss: 1.1102... 0.1228 sec/batch\n", "Epoch: 9/20... Training Step: 3774... Training loss: 1.2381... 0.1123 sec/batch\n", "Epoch: 9/20... Training Step: 3775... Training loss: 1.1156... 0.1145 sec/batch\n", "Epoch: 9/20... Training Step: 3776... Training loss: 1.2140... 0.1143 sec/batch\n", "Epoch: 9/20... Training Step: 3777... Training loss: 1.0763... 0.1214 sec/batch\n", "Epoch: 9/20... Training Step: 3778... Training loss: 1.2878... 0.1166 sec/batch\n", "Epoch: 9/20... Training Step: 3779... Training loss: 1.1538... 0.1178 sec/batch\n", "Epoch: 9/20... Training Step: 3780... Training loss: 1.1438... 0.1218 sec/batch\n", "Epoch: 9/20... Training Step: 3781... Training loss: 1.1072... 0.1203 sec/batch\n", "Epoch: 9/20... Training Step: 3782... Training loss: 1.2298... 0.1172 sec/batch\n", "Epoch: 9/20... Training Step: 3783... Training loss: 1.2939... 0.1150 sec/batch\n", "Epoch: 9/20... Training Step: 3784... Training loss: 1.1040... 0.1198 sec/batch\n", "Epoch: 9/20... Training Step: 3785... Training loss: 1.2440... 0.1222 sec/batch\n", "Epoch: 9/20... Training Step: 3786... Training loss: 1.0282... 0.1161 sec/batch\n", "Epoch: 9/20... Training Step: 3787... Training loss: 1.3070... 0.1165 sec/batch\n", "Epoch: 9/20... Training Step: 3788... Training loss: 1.0301... 0.1167 sec/batch\n", "Epoch: 9/20... Training Step: 3789... Training loss: 1.0376... 0.1187 sec/batch\n", "Epoch: 9/20... Training Step: 3790... Training loss: 1.1478... 0.1178 sec/batch\n", "Epoch: 9/20... Training Step: 3791... Training loss: 1.1900... 0.1158 sec/batch\n", "Epoch: 9/20... Training Step: 3792... Training loss: 1.0514... 0.1167 sec/batch\n", "Epoch: 9/20... Training Step: 3793... Training loss: 1.3060... 0.1198 sec/batch\n", "Epoch: 9/20... Training Step: 3794... Training loss: 1.1302... 0.1146 sec/batch\n", "Epoch: 9/20... Training Step: 3795... Training loss: 0.9719... 0.1194 sec/batch\n", "Epoch: 9/20... Training Step: 3796... Training loss: 1.1993... 0.1189 sec/batch\n", "Epoch: 9/20... Training Step: 3797... Training loss: 1.1943... 0.1199 sec/batch\n", "Epoch: 9/20... Training Step: 3798... Training loss: 1.3317... 0.1213 sec/batch\n", "Epoch: 9/20... Training Step: 3799... Training loss: 1.0516... 0.1131 sec/batch\n", "Epoch: 9/20... Training Step: 3800... Training loss: 1.2661... 0.1191 sec/batch\n", "Epoch: 9/20... Training Step: 3801... Training loss: 1.2984... 0.1341 sec/batch\n", "Epoch: 9/20... Training Step: 3802... Training loss: 1.0981... 0.1316 sec/batch\n", "Epoch: 9/20... Training Step: 3803... Training loss: 1.2408... 0.1398 sec/batch\n", "Epoch: 9/20... Training Step: 3804... Training loss: 1.3233... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 3805... Training loss: 0.9964... 0.1304 sec/batch\n", "Epoch: 9/20... Training Step: 3806... Training loss: 1.3029... 0.1260 sec/batch\n", "Epoch: 9/20... Training Step: 3807... Training loss: 1.1359... 0.1312 sec/batch\n", "Epoch: 9/20... Training Step: 3808... Training loss: 1.1413... 0.1275 sec/batch\n", "Epoch: 9/20... Training Step: 3809... Training loss: 1.3998... 0.1333 sec/batch\n", "Epoch: 9/20... Training Step: 3810... Training loss: 1.3632... 0.1234 sec/batch\n", "Epoch: 9/20... Training Step: 3811... Training loss: 1.1958... 0.1310 sec/batch\n", "Epoch: 9/20... Training Step: 3812... Training loss: 1.1398... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3813... Training loss: 1.2688... 0.1347 sec/batch\n", "Epoch: 9/20... Training Step: 3814... Training loss: 1.3363... 0.1176 sec/batch\n", "Epoch: 9/20... Training Step: 3815... Training loss: 1.4669... 0.1337 sec/batch\n", "Epoch: 9/20... Training Step: 3816... Training loss: 1.1994... 0.1268 sec/batch\n", "Epoch: 9/20... Training Step: 3817... Training loss: 1.3930... 0.1323 sec/batch\n", "Epoch: 9/20... Training Step: 3818... Training loss: 1.3335... 0.1322 sec/batch\n", "Epoch: 9/20... Training Step: 3819... Training loss: 1.2424... 0.1179 sec/batch\n", "Epoch: 9/20... Training Step: 3820... Training loss: 1.2670... 0.1203 sec/batch\n", "Epoch: 9/20... Training Step: 3821... Training loss: 1.2071... 0.1310 sec/batch\n", "Epoch: 9/20... Training Step: 3822... Training loss: 1.0459... 0.1316 sec/batch\n", "Epoch: 9/20... Training Step: 3823... Training loss: 1.1453... 0.1358 sec/batch\n", "Epoch: 9/20... Training Step: 3824... Training loss: 1.0989... 0.1421 sec/batch\n", "Epoch: 9/20... Training Step: 3825... Training loss: 1.2646... 0.1347 sec/batch\n", "Epoch: 9/20... Training Step: 3826... Training loss: 1.3884... 0.1342 sec/batch\n", "Epoch: 9/20... Training Step: 3827... Training loss: 1.1303... 0.1341 sec/batch\n", "Epoch: 9/20... Training Step: 3828... Training loss: 1.1183... 0.1322 sec/batch\n", "Epoch: 9/20... Training Step: 3829... Training loss: 1.1555... 0.1309 sec/batch\n", "Epoch: 9/20... Training Step: 3830... Training loss: 1.1988... 0.1426 sec/batch\n", "Epoch: 9/20... Training Step: 3831... Training loss: 1.2079... 0.1288 sec/batch\n", "Epoch: 9/20... Training Step: 3832... Training loss: 1.0413... 0.1258 sec/batch\n", "Epoch: 9/20... Training Step: 3833... Training loss: 1.2583... 0.1195 sec/batch\n", "Epoch: 9/20... Training Step: 3834... Training loss: 1.2215... 0.1312 sec/batch\n", "Epoch: 9/20... Training Step: 3835... Training loss: 1.2448... 0.1259 sec/batch\n", "Epoch: 9/20... Training Step: 3836... Training loss: 1.1691... 0.1213 sec/batch\n", "Epoch: 9/20... Training Step: 3837... Training loss: 1.1772... 0.1385 sec/batch\n", "Epoch: 9/20... Training Step: 3838... Training loss: 1.0182... 0.1391 sec/batch\n", "Epoch: 9/20... Training Step: 3839... Training loss: 1.1551... 0.1302 sec/batch\n", "Epoch: 9/20... Training Step: 3840... Training loss: 1.3720... 0.1270 sec/batch\n", "Epoch: 9/20... Training Step: 3841... Training loss: 1.2571... 0.1287 sec/batch\n", "Epoch: 9/20... Training Step: 3842... Training loss: 1.1206... 0.1283 sec/batch\n", "Epoch: 9/20... Training Step: 3843... Training loss: 1.3158... 0.1292 sec/batch\n", "Epoch: 9/20... Training Step: 3844... Training loss: 1.1912... 0.1277 sec/batch\n", "Epoch: 9/20... Training Step: 3845... Training loss: 1.1410... 0.1296 sec/batch\n", "Epoch: 9/20... Training Step: 3846... Training loss: 1.4156... 0.1312 sec/batch\n", "Epoch: 9/20... Training Step: 3847... Training loss: 1.0270... 0.1299 sec/batch\n", "Epoch: 9/20... Training Step: 3848... Training loss: 0.9582... 0.1195 sec/batch\n", "Epoch: 9/20... Training Step: 3849... Training loss: 1.0672... 0.1227 sec/batch\n", "Epoch: 9/20... Training Step: 3850... Training loss: 1.1840... 0.1200 sec/batch\n", "Epoch: 9/20... Training Step: 3851... Training loss: 1.1181... 0.1338 sec/batch\n", "Epoch: 9/20... Training Step: 3852... Training loss: 1.2428... 0.1211 sec/batch\n", "Epoch: 9/20... Training Step: 3853... Training loss: 1.0695... 0.1171 sec/batch\n", "Epoch: 9/20... Training Step: 3854... Training loss: 1.0753... 0.1223 sec/batch\n", "Epoch: 9/20... Training Step: 3855... Training loss: 1.0133... 0.1280 sec/batch\n", "Epoch: 9/20... Training Step: 3856... Training loss: 1.2034... 0.1215 sec/batch\n", "Epoch: 9/20... Training Step: 3857... Training loss: 1.1841... 0.1135 sec/batch\n", "Epoch: 9/20... Training Step: 3858... Training loss: 1.1464... 0.1210 sec/batch\n", "Epoch: 9/20... Training Step: 3859... Training loss: 1.1337... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3860... Training loss: 1.0549... 0.1195 sec/batch\n", "Epoch: 9/20... Training Step: 3861... Training loss: 1.0795... 0.1243 sec/batch\n", "Epoch: 9/20... Training Step: 3862... Training loss: 1.4009... 0.1232 sec/batch\n", "Epoch: 9/20... Training Step: 3863... Training loss: 1.1578... 0.1417 sec/batch\n", "Epoch: 9/20... Training Step: 3864... Training loss: 1.2086... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 3865... Training loss: 1.2938... 0.1335 sec/batch\n", "Epoch: 9/20... Training Step: 3866... Training loss: 1.1621... 0.1250 sec/batch\n", "Epoch: 9/20... Training Step: 3867... Training loss: 1.2238... 0.1235 sec/batch\n", "Epoch: 9/20... Training Step: 3868... Training loss: 1.1627... 0.1143 sec/batch\n", "Epoch: 9/20... Training Step: 3869... Training loss: 1.0434... 0.1181 sec/batch\n", "Epoch: 9/20... Training Step: 3870... Training loss: 1.2588... 0.1169 sec/batch\n", "Epoch: 9/20... Training Step: 3871... Training loss: 0.9961... 0.1212 sec/batch\n", "Epoch: 9/20... Training Step: 3872... Training loss: 1.0530... 0.1159 sec/batch\n", "Epoch: 9/20... Training Step: 3873... Training loss: 1.3061... 0.1309 sec/batch\n", "Epoch: 9/20... Training Step: 3874... Training loss: 1.1153... 0.1298 sec/batch\n", "Epoch: 9/20... Training Step: 3875... Training loss: 1.3081... 0.1313 sec/batch\n", "Epoch: 9/20... Training Step: 3876... Training loss: 1.0099... 0.1303 sec/batch\n", "Epoch: 9/20... Training Step: 3877... Training loss: 1.2241... 0.1204 sec/batch\n", "Epoch: 9/20... Training Step: 3878... Training loss: 1.0746... 0.1218 sec/batch\n", "Epoch: 9/20... Training Step: 3879... Training loss: 1.0854... 0.1235 sec/batch\n", "Epoch: 9/20... Training Step: 3880... Training loss: 1.4502... 0.1279 sec/batch\n", "Epoch: 9/20... Training Step: 3881... Training loss: 1.1510... 0.1259 sec/batch\n", "Epoch: 9/20... Training Step: 3882... Training loss: 1.2109... 0.1257 sec/batch\n", "Epoch: 9/20... Training Step: 3883... Training loss: 1.2336... 0.1221 sec/batch\n", "Epoch: 9/20... Training Step: 3884... Training loss: 1.2690... 0.1159 sec/batch\n", "Epoch: 9/20... Training Step: 3885... Training loss: 0.9936... 0.1190 sec/batch\n", "Epoch: 9/20... Training Step: 3886... Training loss: 1.2161... 0.1295 sec/batch\n", "Epoch: 9/20... Training Step: 3887... Training loss: 1.2669... 0.1153 sec/batch\n", "Epoch: 9/20... Training Step: 3888... Training loss: 1.0813... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3889... Training loss: 1.0671... 0.1246 sec/batch\n", "Epoch: 9/20... Training Step: 3890... Training loss: 1.2827... 0.1267 sec/batch\n", "Epoch: 9/20... Training Step: 3891... Training loss: 1.0078... 0.1320 sec/batch\n", "Epoch: 9/20... Training Step: 3892... Training loss: 1.2362... 0.1163 sec/batch\n", "Epoch: 9/20... Training Step: 3893... Training loss: 1.0349... 0.1275 sec/batch\n", "Epoch: 9/20... Training Step: 3894... Training loss: 1.2117... 0.1288 sec/batch\n", "Epoch: 9/20... Training Step: 3895... Training loss: 1.3236... 0.1272 sec/batch\n", "Epoch: 9/20... Training Step: 3896... Training loss: 1.2329... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 3897... Training loss: 1.2642... 0.1353 sec/batch\n", "Epoch: 9/20... Training Step: 3898... Training loss: 1.2242... 0.1250 sec/batch\n", "Epoch: 9/20... Training Step: 3899... Training loss: 1.2828... 0.1202 sec/batch\n", "Epoch: 9/20... Training Step: 3900... Training loss: 1.1515... 0.1188 sec/batch\n", "Epoch: 9/20... Training Step: 3901... Training loss: 1.3210... 0.1207 sec/batch\n", "Epoch: 9/20... Training Step: 3902... Training loss: 1.1601... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3903... Training loss: 1.0494... 0.1289 sec/batch\n", "Epoch: 9/20... Training Step: 3904... Training loss: 1.2767... 0.1201 sec/batch\n", "Epoch: 9/20... Training Step: 3905... Training loss: 1.1415... 0.1268 sec/batch\n", "Epoch: 9/20... Training Step: 3906... Training loss: 1.2722... 0.1244 sec/batch\n", "Epoch: 9/20... Training Step: 3907... Training loss: 1.2561... 0.1289 sec/batch\n", "Epoch: 9/20... Training Step: 3908... Training loss: 1.2410... 0.1237 sec/batch\n", "Epoch: 9/20... Training Step: 3909... Training loss: 1.1165... 0.1302 sec/batch\n", "Epoch: 9/20... Training Step: 3910... Training loss: 1.2457... 0.1215 sec/batch\n", "Epoch: 9/20... Training Step: 3911... Training loss: 0.9192... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 3912... Training loss: 1.2001... 0.1278 sec/batch\n", "Epoch: 9/20... Training Step: 3913... Training loss: 1.1458... 0.1390 sec/batch\n", "Epoch: 9/20... Training Step: 3914... Training loss: 1.1605... 0.1344 sec/batch\n", "Epoch: 9/20... Training Step: 3915... Training loss: 1.1611... 0.1279 sec/batch\n", "Epoch: 9/20... Training Step: 3916... Training loss: 1.3208... 0.1329 sec/batch\n", "Epoch: 9/20... Training Step: 3917... Training loss: 1.0593... 0.1259 sec/batch\n", "Epoch: 9/20... Training Step: 3918... Training loss: 1.1644... 0.1257 sec/batch\n", "Epoch: 9/20... Training Step: 3919... Training loss: 1.0960... 0.1139 sec/batch\n", "Epoch: 9/20... Training Step: 3920... Training loss: 1.2383... 0.1248 sec/batch\n", "Epoch: 9/20... Training Step: 3921... Training loss: 1.1366... 0.1199 sec/batch\n", "Epoch: 9/20... Training Step: 3922... Training loss: 1.0142... 0.1248 sec/batch\n", "Epoch: 9/20... Training Step: 3923... Training loss: 1.0302... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3924... Training loss: 1.2199... 0.1280 sec/batch\n", "Epoch: 9/20... Training Step: 3925... Training loss: 1.3143... 0.1184 sec/batch\n", "Epoch: 9/20... Training Step: 3926... Training loss: 1.0695... 0.1178 sec/batch\n", "Epoch: 9/20... Training Step: 3927... Training loss: 1.3217... 0.1234 sec/batch\n", "Epoch: 9/20... Training Step: 3928... Training loss: 1.1344... 0.1255 sec/batch\n", "Epoch: 9/20... Training Step: 3929... Training loss: 1.2234... 0.1192 sec/batch\n", "Epoch: 9/20... Training Step: 3930... Training loss: 1.1301... 0.1193 sec/batch\n", "Epoch: 9/20... Training Step: 3931... Training loss: 1.3356... 0.1171 sec/batch\n", "Epoch: 9/20... Training Step: 3932... Training loss: 1.1714... 0.1158 sec/batch\n", "Epoch: 9/20... Training Step: 3933... Training loss: 1.1798... 0.1183 sec/batch\n", "Epoch: 9/20... Training Step: 3934... Training loss: 1.4757... 0.1187 sec/batch\n", "Epoch: 9/20... Training Step: 3935... Training loss: 1.2272... 0.1156 sec/batch\n", "Epoch: 9/20... Training Step: 3936... Training loss: 1.3206... 0.1194 sec/batch\n", "Epoch: 9/20... Training Step: 3937... Training loss: 1.1877... 0.1483 sec/batch\n", "Epoch: 9/20... Training Step: 3938... Training loss: 1.3636... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 3939... Training loss: 1.4350... 0.1269 sec/batch\n", "Epoch: 9/20... Training Step: 3940... Training loss: 1.1233... 0.1291 sec/batch\n", "Epoch: 9/20... Training Step: 3941... Training loss: 1.2520... 0.1284 sec/batch\n", "Epoch: 9/20... Training Step: 3942... Training loss: 1.1451... 0.1252 sec/batch\n", "Epoch: 9/20... Training Step: 3943... Training loss: 1.3654... 0.1242 sec/batch\n", "Epoch: 9/20... Training Step: 3944... Training loss: 1.1550... 0.1200 sec/batch\n", "Epoch: 9/20... Training Step: 3945... Training loss: 1.4274... 0.1149 sec/batch\n", "Epoch: 9/20... Training Step: 3946... Training loss: 1.1491... 0.1196 sec/batch\n", "Epoch: 9/20... Training Step: 3947... Training loss: 1.4354... 0.1180 sec/batch\n", "Epoch: 9/20... Training Step: 3948... Training loss: 1.1841... 0.1209 sec/batch\n", "Epoch: 9/20... Training Step: 3949... Training loss: 1.3754... 0.1175 sec/batch\n", "Epoch: 9/20... Training Step: 3950... Training loss: 1.1041... 0.1124 sec/batch\n", "Epoch: 9/20... Training Step: 3951... Training loss: 1.2725... 0.1197 sec/batch\n", "Epoch: 9/20... Training Step: 3952... Training loss: 1.2727... 0.1170 sec/batch\n", "Epoch: 9/20... Training Step: 3953... Training loss: 1.1806... 0.1172 sec/batch\n", "Epoch: 9/20... Training Step: 3954... Training loss: 1.1604... 0.1166 sec/batch\n", "Epoch: 9/20... Training Step: 3955... Training loss: 1.2014... 0.1191 sec/batch\n", "Epoch: 9/20... Training Step: 3956... Training loss: 1.2062... 0.1165 sec/batch\n", "Epoch: 9/20... Training Step: 3957... Training loss: 1.2627... 0.1201 sec/batch\n", "Epoch: 9/20... Training Step: 3958... Training loss: 1.0833... 0.1244 sec/batch\n", "Epoch: 9/20... Training Step: 3959... Training loss: 1.0972... 0.1273 sec/batch\n", "Epoch: 9/20... Training Step: 3960... Training loss: 1.3095... 0.1316 sec/batch\n", "Epoch: 9/20... Training Step: 3961... Training loss: 1.1242... 0.1246 sec/batch\n", "Epoch: 9/20... Training Step: 3962... Training loss: 1.2725... 0.1283 sec/batch\n", "Epoch: 9/20... Training Step: 3963... Training loss: 1.2592... 0.1301 sec/batch\n", "Epoch: 9/20... Training Step: 3964... Training loss: 1.1886... 0.1351 sec/batch\n", "Epoch: 9/20... Training Step: 3965... Training loss: 1.1089... 0.1375 sec/batch\n", "Epoch: 9/20... Training Step: 3966... Training loss: 1.1956... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3967... Training loss: 1.1517... 0.1309 sec/batch\n", "Epoch: 9/20... Training Step: 3968... Training loss: 1.1358... 0.1332 sec/batch\n", "Epoch: 9/20... Training Step: 3969... Training loss: 1.4692... 0.1351 sec/batch\n", "Epoch: 9/20... Training Step: 3970... Training loss: 1.0327... 0.1335 sec/batch\n", "Epoch: 9/20... Training Step: 3971... Training loss: 1.1087... 0.1218 sec/batch\n", "Epoch: 9/20... Training Step: 3972... Training loss: 1.0598... 0.1253 sec/batch\n", "Epoch: 9/20... Training Step: 3973... Training loss: 1.2052... 0.1303 sec/batch\n", "Epoch: 9/20... Training Step: 3974... Training loss: 1.2612... 0.1245 sec/batch\n", "Epoch: 9/20... Training Step: 3975... Training loss: 1.1524... 0.1229 sec/batch\n", "Epoch: 9/20... Training Step: 3976... Training loss: 1.2045... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 3977... Training loss: 1.2814... 0.1277 sec/batch\n", "Epoch: 9/20... Training Step: 3978... Training loss: 1.2965... 0.1314 sec/batch\n", "Epoch: 9/20... Training Step: 3979... Training loss: 1.4474... 0.1273 sec/batch\n", "Epoch: 9/20... Training Step: 3980... Training loss: 1.3096... 0.1202 sec/batch\n", "Epoch: 9/20... Training Step: 3981... Training loss: 1.2294... 0.1135 sec/batch\n", "Epoch: 9/20... Training Step: 3982... Training loss: 1.3974... 0.1149 sec/batch\n", "Epoch: 9/20... Training Step: 3983... Training loss: 1.3126... 0.1180 sec/batch\n", "Epoch: 9/20... Training Step: 3984... Training loss: 1.3374... 0.1260 sec/batch\n", "Epoch: 9/20... Training Step: 3985... Training loss: 1.3046... 0.1347 sec/batch\n", "Epoch: 9/20... Training Step: 3986... Training loss: 1.2181... 0.1285 sec/batch\n", "Epoch: 9/20... Training Step: 3987... Training loss: 1.3277... 0.1259 sec/batch\n", "Epoch: 9/20... Training Step: 3988... Training loss: 1.0947... 0.1309 sec/batch\n", "Epoch: 9/20... Training Step: 3989... Training loss: 1.1564... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 3990... Training loss: 1.3555... 0.1228 sec/batch\n", "Epoch: 9/20... Training Step: 3991... Training loss: 1.1499... 0.1308 sec/batch\n", "Epoch: 9/20... Training Step: 3992... Training loss: 1.2126... 0.1283 sec/batch\n", "Epoch: 9/20... Training Step: 3993... Training loss: 1.1097... 0.1471 sec/batch\n", "Epoch: 9/20... Training Step: 3994... Training loss: 1.2027... 0.1399 sec/batch\n", "Epoch: 9/20... Training Step: 3995... Training loss: 1.0800... 0.1305 sec/batch\n", "Epoch: 9/20... Training Step: 3996... Training loss: 1.2544... 0.1428 sec/batch\n", "Epoch: 9/20... Training Step: 3997... Training loss: 1.0967... 0.1328 sec/batch\n", "Epoch: 9/20... Training Step: 3998... Training loss: 1.2055... 0.1287 sec/batch\n", "Epoch: 9/20... Training Step: 3999... Training loss: 1.2410... 0.1389 sec/batch\n", "Epoch: 9/20... Training Step: 4000... Training loss: 1.3239... 0.1429 sec/batch\n", "Epoch: 9/20... Training Step: 4001... Training loss: 1.2892... 0.1188 sec/batch\n", "Epoch: 9/20... Training Step: 4002... Training loss: 1.2924... 0.1138 sec/batch\n", "Epoch: 9/20... Training Step: 4003... Training loss: 1.1903... 0.1208 sec/batch\n", "Epoch: 9/20... Training Step: 4004... Training loss: 1.0960... 0.1248 sec/batch\n", "Epoch: 9/20... Training Step: 4005... Training loss: 1.1214... 0.1221 sec/batch\n", "Epoch: 9/20... Training Step: 4006... Training loss: 1.1866... 0.1257 sec/batch\n", "Epoch: 9/20... Training Step: 4007... Training loss: 1.2709... 0.1147 sec/batch\n", "Epoch: 9/20... Training Step: 4008... Training loss: 1.5377... 0.1157 sec/batch\n", "Epoch: 9/20... Training Step: 4009... Training loss: 1.1470... 0.1174 sec/batch\n", "Epoch: 9/20... Training Step: 4010... Training loss: 1.1955... 0.1251 sec/batch\n", "Epoch: 9/20... Training Step: 4011... Training loss: 1.1117... 0.1261 sec/batch\n", "Epoch: 9/20... Training Step: 4012... Training loss: 1.2144... 0.1316 sec/batch\n", "Epoch: 9/20... Training Step: 4013... Training loss: 1.1689... 0.1246 sec/batch\n", "Epoch: 9/20... Training Step: 4014... Training loss: 1.2033... 0.1256 sec/batch\n", "Epoch: 9/20... Training Step: 4015... Training loss: 0.9384... 0.1258 sec/batch\n", "Epoch: 9/20... Training Step: 4016... Training loss: 1.3535... 0.1288 sec/batch\n", "Epoch: 9/20... Training Step: 4017... Training loss: 1.0835... 0.1284 sec/batch\n", "Epoch: 9/20... Training Step: 4018... Training loss: 1.1843... 0.1267 sec/batch\n", "Epoch: 9/20... Training Step: 4019... Training loss: 1.2152... 0.1306 sec/batch\n", "Epoch: 9/20... Training Step: 4020... Training loss: 1.5008... 0.1229 sec/batch\n", "Epoch: 9/20... Training Step: 4021... Training loss: 1.2543... 0.1341 sec/batch\n", "Epoch: 9/20... Training Step: 4022... Training loss: 1.3007... 0.1277 sec/batch\n", "Epoch: 9/20... Training Step: 4023... Training loss: 1.1771... 0.1170 sec/batch\n", "Epoch: 9/20... Training Step: 4024... Training loss: 1.1595... 0.1199 sec/batch\n", "Epoch: 9/20... Training Step: 4025... Training loss: 1.1253... 0.1258 sec/batch\n", "Epoch: 9/20... Training Step: 4026... Training loss: 1.1427... 0.1306 sec/batch\n", "Epoch: 9/20... Training Step: 4027... Training loss: 0.9541... 0.1280 sec/batch\n", "Epoch: 9/20... Training Step: 4028... Training loss: 1.0082... 0.1393 sec/batch\n", "Epoch: 9/20... Training Step: 4029... Training loss: 1.1587... 0.1402 sec/batch\n", "Epoch: 9/20... Training Step: 4030... Training loss: 1.0303... 0.1262 sec/batch\n", "Epoch: 9/20... Training Step: 4031... Training loss: 1.0602... 0.1254 sec/batch\n", "Epoch: 9/20... Training Step: 4032... Training loss: 1.0453... 0.1129 sec/batch\n", "Epoch: 9/20... Training Step: 4033... Training loss: 1.0932... 0.1146 sec/batch\n", "Epoch: 9/20... Training Step: 4034... Training loss: 1.3645... 0.1263 sec/batch\n", "Epoch: 9/20... Training Step: 4035... Training loss: 1.0722... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 4036... Training loss: 0.9849... 0.1279 sec/batch\n", "Epoch: 9/20... Training Step: 4037... Training loss: 1.0758... 0.1222 sec/batch\n", "Epoch: 9/20... Training Step: 4038... Training loss: 1.0089... 0.1216 sec/batch\n", "Epoch: 9/20... Training Step: 4039... Training loss: 1.1203... 0.1196 sec/batch\n", "Epoch: 9/20... Training Step: 4040... Training loss: 1.0915... 0.1174 sec/batch\n", "Epoch: 9/20... Training Step: 4041... Training loss: 1.2433... 0.1194 sec/batch\n", "Epoch: 9/20... Training Step: 4042... Training loss: 1.1655... 0.1175 sec/batch\n", "Epoch: 9/20... Training Step: 4043... Training loss: 1.1939... 0.1162 sec/batch\n", "Epoch: 9/20... Training Step: 4044... Training loss: 1.1152... 0.1207 sec/batch\n", "Epoch: 9/20... Training Step: 4045... Training loss: 1.1523... 0.1275 sec/batch\n", "Epoch: 9/20... Training Step: 4046... Training loss: 1.0753... 0.1216 sec/batch\n", "Epoch: 9/20... Training Step: 4047... Training loss: 1.1885... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 4048... Training loss: 1.1437... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 4049... Training loss: 1.2248... 0.1298 sec/batch\n", "Epoch: 9/20... Training Step: 4050... Training loss: 1.0015... 0.1242 sec/batch\n", "Epoch: 9/20... Training Step: 4051... Training loss: 1.2726... 0.1254 sec/batch\n", "Epoch: 9/20... Training Step: 4052... Training loss: 1.0893... 0.1185 sec/batch\n", "Epoch: 9/20... Training Step: 4053... Training loss: 1.0452... 0.1299 sec/batch\n", "Epoch: 9/20... Training Step: 4054... Training loss: 1.1615... 0.1172 sec/batch\n", "Epoch: 9/20... Training Step: 4055... Training loss: 1.0424... 0.1181 sec/batch\n", "Epoch: 9/20... Training Step: 4056... Training loss: 1.4045... 0.1177 sec/batch\n", "Epoch: 9/20... Training Step: 4057... Training loss: 1.0259... 0.1143 sec/batch\n", "Epoch: 9/20... Training Step: 4058... Training loss: 0.9632... 0.1202 sec/batch\n", "Epoch: 9/20... Training Step: 4059... Training loss: 0.9937... 0.1216 sec/batch\n", "Epoch: 9/20... Training Step: 4060... Training loss: 1.4693... 0.1215 sec/batch\n", "Epoch: 9/20... Training Step: 4061... Training loss: 0.9853... 0.1124 sec/batch\n", "Epoch: 9/20... Training Step: 4062... Training loss: 1.2074... 0.1253 sec/batch\n", "Epoch: 9/20... Training Step: 4063... Training loss: 1.1137... 0.1240 sec/batch\n", "Epoch: 9/20... Training Step: 4064... Training loss: 1.1248... 0.1257 sec/batch\n", "Epoch: 9/20... Training Step: 4065... Training loss: 1.0584... 0.1261 sec/batch\n", "Epoch: 9/20... Training Step: 4066... Training loss: 0.8470... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 4067... Training loss: 1.2456... 0.1284 sec/batch\n", "Epoch: 9/20... Training Step: 4068... Training loss: 1.1321... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 4069... Training loss: 1.1332... 0.1297 sec/batch\n", "Epoch: 9/20... Training Step: 4070... Training loss: 1.3777... 0.1235 sec/batch\n", "Epoch: 9/20... Training Step: 4071... Training loss: 1.3316... 0.1225 sec/batch\n", "Epoch: 9/20... Training Step: 4072... Training loss: 1.0148... 0.1251 sec/batch\n", "Epoch: 9/20... Training Step: 4073... Training loss: 1.2889... 0.1264 sec/batch\n", "Epoch: 9/20... Training Step: 4074... Training loss: 1.2710... 0.1227 sec/batch\n", "Epoch: 9/20... Training Step: 4075... Training loss: 0.9636... 0.1207 sec/batch\n", "Epoch: 9/20... Training Step: 4076... Training loss: 1.1778... 0.1205 sec/batch\n", "Epoch: 9/20... Training Step: 4077... Training loss: 1.1487... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 4078... Training loss: 1.2552... 0.1306 sec/batch\n", "Epoch: 9/20... Training Step: 4079... Training loss: 1.1047... 0.1289 sec/batch\n", "Epoch: 9/20... Training Step: 4080... Training loss: 1.2616... 0.1309 sec/batch\n", "Epoch: 9/20... Training Step: 4081... Training loss: 1.1982... 0.1198 sec/batch\n", "Epoch: 9/20... Training Step: 4082... Training loss: 1.1430... 0.1231 sec/batch\n", "Epoch: 9/20... Training Step: 4083... Training loss: 1.0058... 0.1249 sec/batch\n", "Epoch: 9/20... Training Step: 4084... Training loss: 1.0760... 0.1290 sec/batch\n", "Epoch: 9/20... Training Step: 4085... Training loss: 0.9818... 0.1256 sec/batch\n", "Epoch: 9/20... Training Step: 4086... Training loss: 1.2802... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 4087... Training loss: 1.2483... 0.1280 sec/batch\n", "Epoch: 9/20... Training Step: 4088... Training loss: 1.2787... 0.1259 sec/batch\n", "Epoch: 9/20... Training Step: 4089... Training loss: 1.3062... 0.1217 sec/batch\n", "Epoch: 9/20... Training Step: 4090... Training loss: 1.2343... 0.1239 sec/batch\n", "Epoch: 9/20... Training Step: 4091... Training loss: 1.0578... 0.1263 sec/batch\n", "Epoch: 9/20... Training Step: 4092... Training loss: 1.2267... 0.1229 sec/batch\n", "Epoch: 9/20... Training Step: 4093... Training loss: 1.0127... 0.1171 sec/batch\n", "Epoch: 9/20... Training Step: 4094... Training loss: 1.0636... 0.1263 sec/batch\n", "Epoch: 9/20... Training Step: 4095... Training loss: 0.9899... 0.1276 sec/batch\n", "Epoch: 9/20... Training Step: 4096... Training loss: 1.1551... 0.1282 sec/batch\n", "Epoch: 9/20... Training Step: 4097... Training loss: 1.1092... 0.1253 sec/batch\n", "Epoch: 9/20... Training Step: 4098... Training loss: 1.1581... 0.1242 sec/batch\n", "Epoch: 9/20... Training Step: 4099... Training loss: 1.1301... 0.1316 sec/batch\n", "Epoch: 9/20... Training Step: 4100... Training loss: 0.9596... 0.1241 sec/batch\n", "Epoch: 9/20... Training Step: 4101... Training loss: 0.9633... 0.1344 sec/batch\n", "Epoch: 9/20... Training Step: 4102... Training loss: 1.1678... 0.1302 sec/batch\n", "Epoch: 9/20... Training Step: 4103... Training loss: 1.0343... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 4104... Training loss: 1.0770... 0.1214 sec/batch\n", "Epoch: 9/20... Training Step: 4105... Training loss: 1.1124... 0.1271 sec/batch\n", "Epoch: 9/20... Training Step: 4106... Training loss: 1.1495... 0.1298 sec/batch\n", "Epoch: 9/20... Training Step: 4107... Training loss: 1.0969... 0.1384 sec/batch\n", "Epoch: 9/20... Training Step: 4108... Training loss: 1.2966... 0.1331 sec/batch\n", "Epoch: 9/20... Training Step: 4109... Training loss: 1.1019... 0.1315 sec/batch\n", "Epoch: 9/20... Training Step: 4110... Training loss: 1.0561... 0.1270 sec/batch\n", "Epoch: 9/20... Training Step: 4111... Training loss: 1.1790... 0.1321 sec/batch\n", "Epoch: 9/20... Training Step: 4112... Training loss: 1.1200... 0.1266 sec/batch\n", "Epoch: 9/20... Training Step: 4113... Training loss: 1.0765... 0.1349 sec/batch\n", "Epoch: 9/20... Training Step: 4114... Training loss: 1.1980... 0.1308 sec/batch\n", "Epoch: 9/20... Training Step: 4115... Training loss: 0.9648... 0.1306 sec/batch\n", "Epoch: 9/20... Training Step: 4116... Training loss: 1.2153... 0.1256 sec/batch\n", "Epoch: 9/20... Training Step: 4117... Training loss: 1.1668... 0.1291 sec/batch\n", "Epoch: 9/20... Training Step: 4118... Training loss: 1.2990... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 4119... Training loss: 1.0973... 0.1289 sec/batch\n", "Epoch: 9/20... Training Step: 4120... Training loss: 1.4914... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 4121... Training loss: 1.1640... 0.1263 sec/batch\n", "Epoch: 9/20... Training Step: 4122... Training loss: 1.1291... 0.1288 sec/batch\n", "Epoch: 9/20... Training Step: 4123... Training loss: 1.0696... 0.1305 sec/batch\n", "Epoch: 9/20... Training Step: 4124... Training loss: 1.0987... 0.1240 sec/batch\n", "Epoch: 9/20... Training Step: 4125... Training loss: 1.2570... 0.1285 sec/batch\n", "Epoch: 9/20... Training Step: 4126... Training loss: 1.2944... 0.1233 sec/batch\n", "Epoch: 9/20... Training Step: 4127... Training loss: 1.3613... 0.1226 sec/batch\n", "Epoch: 9/20... Training Step: 4128... Training loss: 1.4389... 0.1282 sec/batch\n", "Epoch: 9/20... Training Step: 4129... Training loss: 1.2639... 0.1182 sec/batch\n", "Epoch: 9/20... Training Step: 4130... Training loss: 1.0235... 0.1256 sec/batch\n", "Epoch: 9/20... Training Step: 4131... Training loss: 1.2217... 0.1287 sec/batch\n", "Epoch: 9/20... Training Step: 4132... Training loss: 1.0099... 0.1279 sec/batch\n", "Epoch: 9/20... Training Step: 4133... Training loss: 1.2878... 0.1200 sec/batch\n", "Epoch: 9/20... Training Step: 4134... Training loss: 1.2617... 0.1277 sec/batch\n", "Epoch: 9/20... Training Step: 4135... Training loss: 1.1982... 0.1236 sec/batch\n", "Epoch: 9/20... Training Step: 4136... Training loss: 1.3560... 0.1255 sec/batch\n", "Epoch: 9/20... Training Step: 4137... Training loss: 1.0892... 0.1286 sec/batch\n", "Epoch: 9/20... Training Step: 4138... Training loss: 1.1437... 0.1234 sec/batch\n", "Epoch: 9/20... Training Step: 4139... Training loss: 1.2856... 0.1152 sec/batch\n", "Epoch: 9/20... Training Step: 4140... Training loss: 1.1652... 0.1137 sec/batch\n", "Epoch: 9/20... Training Step: 4141... Training loss: 1.0497... 0.1234 sec/batch\n", "Epoch: 9/20... Training Step: 4142... Training loss: 1.1332... 0.1409 sec/batch\n", "Epoch: 9/20... Training Step: 4143... Training loss: 1.3548... 0.1249 sec/batch\n", "Epoch: 9/20... Training Step: 4144... Training loss: 1.0759... 0.1264 sec/batch\n", "Epoch: 9/20... Training Step: 4145... Training loss: 1.3749... 0.1290 sec/batch\n", "Epoch: 9/20... Training Step: 4146... Training loss: 1.3487... 0.1269 sec/batch\n", "Epoch: 9/20... Training Step: 4147... Training loss: 1.0991... 0.1261 sec/batch\n", "Epoch: 9/20... Training Step: 4148... Training loss: 1.2306... 0.1252 sec/batch\n", "Epoch: 9/20... Training Step: 4149... Training loss: 1.1169... 0.1240 sec/batch\n", "Epoch: 9/20... Training Step: 4150... Training loss: 1.2973... 0.1283 sec/batch\n", "Epoch: 9/20... Training Step: 4151... Training loss: 1.4494... 0.1304 sec/batch\n", "Epoch: 9/20... Training Step: 4152... Training loss: 1.5044... 0.1265 sec/batch\n", "Epoch: 9/20... Training Step: 4153... Training loss: 1.2041... 0.1281 sec/batch\n", "Epoch: 9/20... Training Step: 4154... Training loss: 1.1673... 0.1269 sec/batch\n", "Epoch: 9/20... Training Step: 4155... Training loss: 1.3387... 0.1257 sec/batch\n", "Epoch: 9/20... Training Step: 4156... Training loss: 1.1266... 0.1258 sec/batch\n", "Epoch: 9/20... Training Step: 4157... Training loss: 1.2123... 0.1238 sec/batch\n", "Epoch: 9/20... Training Step: 4158... Training loss: 1.1009... 0.1260 sec/batch\n", "Epoch: 9/20... Training Step: 4159... Training loss: 1.2821... 0.1286 sec/batch\n", "Epoch: 9/20... Training Step: 4160... Training loss: 1.0411... 0.1303 sec/batch\n", "Epoch: 9/20... Training Step: 4161... Training loss: 1.2089... 0.1272 sec/batch\n", "Epoch: 9/20... Training Step: 4162... Training loss: 1.1728... 0.1201 sec/batch\n", "Epoch: 9/20... Training Step: 4163... Training loss: 1.0922... 0.1321 sec/batch\n", "Epoch: 9/20... Training Step: 4164... Training loss: 1.2159... 0.1192 sec/batch\n", "Epoch: 9/20... Training Step: 4165... Training loss: 1.1454... 0.1219 sec/batch\n", "Epoch: 9/20... Training Step: 4166... Training loss: 1.5105... 0.1274 sec/batch\n", "Epoch: 9/20... Training Step: 4167... Training loss: 1.1704... 0.1215 sec/batch\n", "Epoch: 9/20... Training Step: 4168... Training loss: 1.0189... 0.1283 sec/batch\n", "Epoch: 9/20... Training Step: 4169... Training loss: 1.1923... 0.1291 sec/batch\n", "Epoch: 9/20... Training Step: 4170... Training loss: 0.9818... 0.1288 sec/batch\n", "Epoch: 9/20... Training Step: 4171... Training loss: 1.0383... 0.1242 sec/batch\n", "Epoch: 9/20... Training Step: 4172... Training loss: 1.2189... 0.1352 sec/batch\n", "Epoch: 9/20... Training Step: 4173... Training loss: 1.2283... 0.1277 sec/batch\n", "Epoch: 9/20... Training Step: 4174... Training loss: 1.1110... 0.1258 sec/batch\n", "Epoch: 9/20... Training Step: 4175... Training loss: 1.1709... 0.1247 sec/batch\n", "Epoch: 9/20... Training Step: 4176... Training loss: 1.1041... 0.1259 sec/batch\n", "Epoch: 10/20... Training Step: 4177... Training loss: 1.4897... 0.1271 sec/batch\n", "Epoch: 10/20... Training Step: 4178... Training loss: 1.3029... 0.1230 sec/batch\n", "Epoch: 10/20... Training Step: 4179... Training loss: 1.2326... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4180... Training loss: 1.1466... 0.1249 sec/batch\n", "Epoch: 10/20... Training Step: 4181... Training loss: 1.2613... 0.1247 sec/batch\n", "Epoch: 10/20... Training Step: 4182... Training loss: 0.9899... 0.1215 sec/batch\n", "Epoch: 10/20... Training Step: 4183... Training loss: 1.3281... 0.1228 sec/batch\n", "Epoch: 10/20... Training Step: 4184... Training loss: 1.0899... 0.1228 sec/batch\n", "Epoch: 10/20... Training Step: 4185... Training loss: 0.9513... 0.1291 sec/batch\n", "Epoch: 10/20... Training Step: 4186... Training loss: 1.1952... 0.1224 sec/batch\n", "Epoch: 10/20... Training Step: 4187... Training loss: 1.1751... 0.1340 sec/batch\n", "Epoch: 10/20... Training Step: 4188... Training loss: 0.9337... 0.1248 sec/batch\n", "Epoch: 10/20... Training Step: 4189... Training loss: 1.2610... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4190... Training loss: 0.9347... 0.1313 sec/batch\n", "Epoch: 10/20... Training Step: 4191... Training loss: 1.1948... 0.1248 sec/batch\n", "Epoch: 10/20... Training Step: 4192... Training loss: 1.2187... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4193... Training loss: 1.1291... 0.1322 sec/batch\n", "Epoch: 10/20... Training Step: 4194... Training loss: 1.0373... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4195... Training loss: 1.2021... 0.1244 sec/batch\n", "Epoch: 10/20... Training Step: 4196... Training loss: 1.0496... 0.1258 sec/batch\n", "Epoch: 10/20... Training Step: 4197... Training loss: 1.2756... 0.1135 sec/batch\n", "Epoch: 10/20... Training Step: 4198... Training loss: 1.1040... 0.1172 sec/batch\n", "Epoch: 10/20... Training Step: 4199... Training loss: 1.1884... 0.1255 sec/batch\n", "Epoch: 10/20... Training Step: 4200... Training loss: 1.0920... 0.1208 sec/batch\n", "Epoch: 10/20... Training Step: 4201... Training loss: 1.1449... 0.1260 sec/batch\n", "Epoch: 10/20... Training Step: 4202... Training loss: 1.1218... 0.1211 sec/batch\n", "Epoch: 10/20... Training Step: 4203... Training loss: 1.1524... 0.1202 sec/batch\n", "Epoch: 10/20... Training Step: 4204... Training loss: 0.9931... 0.1313 sec/batch\n", "Epoch: 10/20... Training Step: 4205... Training loss: 1.0479... 0.1202 sec/batch\n", "Epoch: 10/20... Training Step: 4206... Training loss: 1.2005... 0.1263 sec/batch\n", "Epoch: 10/20... Training Step: 4207... Training loss: 1.0116... 0.1325 sec/batch\n", "Epoch: 10/20... Training Step: 4208... Training loss: 1.0173... 0.1291 sec/batch\n", "Epoch: 10/20... Training Step: 4209... Training loss: 1.0208... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4210... Training loss: 0.9153... 0.1299 sec/batch\n", "Epoch: 10/20... Training Step: 4211... Training loss: 0.9842... 0.1251 sec/batch\n", "Epoch: 10/20... Training Step: 4212... Training loss: 1.0295... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4213... Training loss: 1.1670... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4214... Training loss: 1.0439... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4215... Training loss: 1.0147... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4216... Training loss: 1.4644... 0.1261 sec/batch\n", "Epoch: 10/20... Training Step: 4217... Training loss: 1.1416... 0.1275 sec/batch\n", "Epoch: 10/20... Training Step: 4218... Training loss: 1.1423... 0.1278 sec/batch\n", "Epoch: 10/20... Training Step: 4219... Training loss: 1.3705... 0.1302 sec/batch\n", "Epoch: 10/20... Training Step: 4220... Training loss: 0.9318... 0.1247 sec/batch\n", "Epoch: 10/20... Training Step: 4221... Training loss: 1.0997... 0.1217 sec/batch\n", "Epoch: 10/20... Training Step: 4222... Training loss: 1.0647... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4223... Training loss: 1.1137... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4224... Training loss: 1.0588... 0.1270 sec/batch\n", "Epoch: 10/20... Training Step: 4225... Training loss: 1.1267... 0.1254 sec/batch\n", "Epoch: 10/20... Training Step: 4226... Training loss: 1.1558... 0.1162 sec/batch\n", "Epoch: 10/20... Training Step: 4227... Training loss: 1.0758... 0.1149 sec/batch\n", "Epoch: 10/20... Training Step: 4228... Training loss: 1.1836... 0.1137 sec/batch\n", "Epoch: 10/20... Training Step: 4229... Training loss: 1.0702... 0.1143 sec/batch\n", "Epoch: 10/20... Training Step: 4230... Training loss: 1.0644... 0.1209 sec/batch\n", "Epoch: 10/20... Training Step: 4231... Training loss: 1.0265... 0.1222 sec/batch\n", "Epoch: 10/20... Training Step: 4232... Training loss: 1.0832... 0.1168 sec/batch\n", "Epoch: 10/20... Training Step: 4233... Training loss: 1.2093... 0.1128 sec/batch\n", "Epoch: 10/20... Training Step: 4234... Training loss: 1.2224... 0.1286 sec/batch\n", "Epoch: 10/20... Training Step: 4235... Training loss: 0.9000... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4236... Training loss: 1.0365... 0.1236 sec/batch\n", "Epoch: 10/20... Training Step: 4237... Training loss: 0.9875... 0.1198 sec/batch\n", "Epoch: 10/20... Training Step: 4238... Training loss: 1.2142... 0.1200 sec/batch\n", "Epoch: 10/20... Training Step: 4239... Training loss: 1.0867... 0.1296 sec/batch\n", "Epoch: 10/20... Training Step: 4240... Training loss: 1.2296... 0.1279 sec/batch\n", "Epoch: 10/20... Training Step: 4241... Training loss: 1.0848... 0.1213 sec/batch\n", "Epoch: 10/20... Training Step: 4242... Training loss: 1.2648... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4243... Training loss: 1.0764... 0.1338 sec/batch\n", "Epoch: 10/20... Training Step: 4244... Training loss: 1.1088... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4245... Training loss: 0.9594... 0.1322 sec/batch\n", "Epoch: 10/20... Training Step: 4246... Training loss: 1.1748... 0.1285 sec/batch\n", "Epoch: 10/20... Training Step: 4247... Training loss: 1.3319... 0.1325 sec/batch\n", "Epoch: 10/20... Training Step: 4248... Training loss: 0.9734... 0.1305 sec/batch\n", "Epoch: 10/20... Training Step: 4249... Training loss: 1.0947... 0.1309 sec/batch\n", "Epoch: 10/20... Training Step: 4250... Training loss: 0.9825... 0.1289 sec/batch\n", "Epoch: 10/20... Training Step: 4251... Training loss: 1.2376... 0.1262 sec/batch\n", "Epoch: 10/20... Training Step: 4252... Training loss: 0.9625... 0.1315 sec/batch\n", "Epoch: 10/20... Training Step: 4253... Training loss: 1.0855... 0.1282 sec/batch\n", "Epoch: 10/20... Training Step: 4254... Training loss: 1.1098... 0.1215 sec/batch\n", "Epoch: 10/20... Training Step: 4255... Training loss: 1.1574... 0.1167 sec/batch\n", "Epoch: 10/20... Training Step: 4256... Training loss: 1.0148... 0.1257 sec/batch\n", "Epoch: 10/20... Training Step: 4257... Training loss: 1.2213... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4258... Training loss: 1.1717... 0.1141 sec/batch\n", "Epoch: 10/20... Training Step: 4259... Training loss: 0.9735... 0.1138 sec/batch\n", "Epoch: 10/20... Training Step: 4260... Training loss: 1.3001... 0.1151 sec/batch\n", "Epoch: 10/20... Training Step: 4261... Training loss: 1.0866... 0.1203 sec/batch\n", "Epoch: 10/20... Training Step: 4262... Training loss: 1.2688... 0.1206 sec/batch\n", "Epoch: 10/20... Training Step: 4263... Training loss: 1.0210... 0.1297 sec/batch\n", "Epoch: 10/20... Training Step: 4264... Training loss: 1.3269... 0.1274 sec/batch\n", "Epoch: 10/20... Training Step: 4265... Training loss: 1.3788... 0.1263 sec/batch\n", "Epoch: 10/20... Training Step: 4266... Training loss: 1.1015... 0.1268 sec/batch\n", "Epoch: 10/20... Training Step: 4267... Training loss: 1.1486... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4268... Training loss: 1.2981... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4269... Training loss: 0.9786... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4270... Training loss: 1.3597... 0.1296 sec/batch\n", "Epoch: 10/20... Training Step: 4271... Training loss: 1.1045... 0.1284 sec/batch\n", "Epoch: 10/20... Training Step: 4272... Training loss: 1.1656... 0.1158 sec/batch\n", "Epoch: 10/20... Training Step: 4273... Training loss: 1.3334... 0.1295 sec/batch\n", "Epoch: 10/20... Training Step: 4274... Training loss: 1.1610... 0.1208 sec/batch\n", "Epoch: 10/20... Training Step: 4275... Training loss: 1.2580... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4276... Training loss: 1.1842... 0.1222 sec/batch\n", "Epoch: 10/20... Training Step: 4277... Training loss: 1.1798... 0.1227 sec/batch\n", "Epoch: 10/20... Training Step: 4278... Training loss: 1.3106... 0.1227 sec/batch\n", "Epoch: 10/20... Training Step: 4279... Training loss: 1.2596... 0.1286 sec/batch\n", "Epoch: 10/20... Training Step: 4280... Training loss: 1.0992... 0.1283 sec/batch\n", "Epoch: 10/20... Training Step: 4281... Training loss: 1.2577... 0.1271 sec/batch\n", "Epoch: 10/20... Training Step: 4282... Training loss: 1.2658... 0.1233 sec/batch\n", "Epoch: 10/20... Training Step: 4283... Training loss: 1.2340... 0.1272 sec/batch\n", "Epoch: 10/20... Training Step: 4284... Training loss: 1.1958... 0.1293 sec/batch\n", "Epoch: 10/20... Training Step: 4285... Training loss: 1.3253... 0.1268 sec/batch\n", "Epoch: 10/20... Training Step: 4286... Training loss: 1.1065... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4287... Training loss: 1.1880... 0.1249 sec/batch\n", "Epoch: 10/20... Training Step: 4288... Training loss: 1.0821... 0.1213 sec/batch\n", "Epoch: 10/20... Training Step: 4289... Training loss: 1.1503... 0.1309 sec/batch\n", "Epoch: 10/20... Training Step: 4290... Training loss: 1.2567... 0.1269 sec/batch\n", "Epoch: 10/20... Training Step: 4291... Training loss: 1.1389... 0.1217 sec/batch\n", "Epoch: 10/20... Training Step: 4292... Training loss: 1.1063... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4293... Training loss: 1.2297... 0.1275 sec/batch\n", "Epoch: 10/20... Training Step: 4294... Training loss: 1.2314... 0.1230 sec/batch\n", "Epoch: 10/20... Training Step: 4295... Training loss: 1.1724... 0.1316 sec/batch\n", "Epoch: 10/20... Training Step: 4296... Training loss: 1.0651... 0.1264 sec/batch\n", "Epoch: 10/20... Training Step: 4297... Training loss: 1.2658... 0.1291 sec/batch\n", "Epoch: 10/20... Training Step: 4298... Training loss: 1.1955... 0.1276 sec/batch\n", "Epoch: 10/20... Training Step: 4299... Training loss: 1.2097... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4300... Training loss: 1.2730... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4301... Training loss: 1.1441... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4302... Training loss: 1.1319... 0.1239 sec/batch\n", "Epoch: 10/20... Training Step: 4303... Training loss: 1.1309... 0.1341 sec/batch\n", "Epoch: 10/20... Training Step: 4304... Training loss: 1.2407... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4305... Training loss: 1.2400... 0.1304 sec/batch\n", "Epoch: 10/20... Training Step: 4306... Training loss: 1.2216... 0.1274 sec/batch\n", "Epoch: 10/20... Training Step: 4307... Training loss: 1.3206... 0.1330 sec/batch\n", "Epoch: 10/20... Training Step: 4308... Training loss: 1.0707... 0.1217 sec/batch\n", "Epoch: 10/20... Training Step: 4309... Training loss: 1.1349... 0.1300 sec/batch\n", "Epoch: 10/20... Training Step: 4310... Training loss: 1.3210... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4311... Training loss: 1.0921... 0.1145 sec/batch\n", "Epoch: 10/20... Training Step: 4312... Training loss: 0.8720... 0.1180 sec/batch\n", "Epoch: 10/20... Training Step: 4313... Training loss: 0.9262... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4314... Training loss: 1.1777... 0.1267 sec/batch\n", "Epoch: 10/20... Training Step: 4315... Training loss: 0.9608... 0.1284 sec/batch\n", "Epoch: 10/20... Training Step: 4316... Training loss: 1.0851... 0.1204 sec/batch\n", "Epoch: 10/20... Training Step: 4317... Training loss: 0.9827... 0.1241 sec/batch\n", "Epoch: 10/20... Training Step: 4318... Training loss: 1.0255... 0.1271 sec/batch\n", "Epoch: 10/20... Training Step: 4319... Training loss: 1.0028... 0.1302 sec/batch\n", "Epoch: 10/20... Training Step: 4320... Training loss: 1.0996... 0.1272 sec/batch\n", "Epoch: 10/20... Training Step: 4321... Training loss: 1.2163... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4322... Training loss: 1.1064... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4323... Training loss: 1.2679... 0.1273 sec/batch\n", "Epoch: 10/20... Training Step: 4324... Training loss: 1.0351... 0.1187 sec/batch\n", "Epoch: 10/20... Training Step: 4325... Training loss: 1.0708... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4326... Training loss: 1.4002... 0.1195 sec/batch\n", "Epoch: 10/20... Training Step: 4327... Training loss: 1.2838... 0.1214 sec/batch\n", "Epoch: 10/20... Training Step: 4328... Training loss: 1.2432... 0.1172 sec/batch\n", "Epoch: 10/20... Training Step: 4329... Training loss: 1.3235... 0.1148 sec/batch\n", "Epoch: 10/20... Training Step: 4330... Training loss: 1.1642... 0.1125 sec/batch\n", "Epoch: 10/20... Training Step: 4331... Training loss: 1.0724... 0.1151 sec/batch\n", "Epoch: 10/20... Training Step: 4332... Training loss: 1.1600... 0.1123 sec/batch\n", "Epoch: 10/20... Training Step: 4333... Training loss: 1.1040... 0.1124 sec/batch\n", "Epoch: 10/20... Training Step: 4334... Training loss: 1.0896... 0.1098 sec/batch\n", "Epoch: 10/20... Training Step: 4335... Training loss: 0.9887... 0.1123 sec/batch\n", "Epoch: 10/20... Training Step: 4336... Training loss: 1.1405... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4337... Training loss: 1.2233... 0.1215 sec/batch\n", "Epoch: 10/20... Training Step: 4338... Training loss: 1.0593... 0.1391 sec/batch\n", "Epoch: 10/20... Training Step: 4339... Training loss: 1.3096... 0.1254 sec/batch\n", "Epoch: 10/20... Training Step: 4340... Training loss: 1.0341... 0.1259 sec/batch\n", "Epoch: 10/20... Training Step: 4341... Training loss: 1.1713... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4342... Training loss: 1.0758... 0.1237 sec/batch\n", "Epoch: 10/20... Training Step: 4343... Training loss: 0.9925... 0.1243 sec/batch\n", "Epoch: 10/20... Training Step: 4344... Training loss: 1.2749... 0.1308 sec/batch\n", "Epoch: 10/20... Training Step: 4345... Training loss: 1.1590... 0.1251 sec/batch\n", "Epoch: 10/20... Training Step: 4346... Training loss: 1.2234... 0.1244 sec/batch\n", "Epoch: 10/20... Training Step: 4347... Training loss: 1.1414... 0.1237 sec/batch\n", "Epoch: 10/20... Training Step: 4348... Training loss: 1.1851... 0.1341 sec/batch\n", "Epoch: 10/20... Training Step: 4349... Training loss: 1.0287... 0.1261 sec/batch\n", "Epoch: 10/20... Training Step: 4350... Training loss: 1.0734... 0.1205 sec/batch\n", "Epoch: 10/20... Training Step: 4351... Training loss: 1.2144... 0.1226 sec/batch\n", "Epoch: 10/20... Training Step: 4352... Training loss: 1.0433... 0.1244 sec/batch\n", "Epoch: 10/20... Training Step: 4353... Training loss: 1.0484... 0.1323 sec/batch\n", "Epoch: 10/20... Training Step: 4354... Training loss: 1.2620... 0.1285 sec/batch\n", "Epoch: 10/20... Training Step: 4355... Training loss: 0.9245... 0.1266 sec/batch\n", "Epoch: 10/20... Training Step: 4356... Training loss: 1.1664... 0.1232 sec/batch\n", "Epoch: 10/20... Training Step: 4357... Training loss: 0.9381... 0.1257 sec/batch\n", "Epoch: 10/20... Training Step: 4358... Training loss: 1.3875... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4359... Training loss: 1.1439... 0.1240 sec/batch\n", "Epoch: 10/20... Training Step: 4360... Training loss: 1.1823... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4361... Training loss: 1.3091... 0.1271 sec/batch\n", "Epoch: 10/20... Training Step: 4362... Training loss: 1.1521... 0.1258 sec/batch\n", "Epoch: 10/20... Training Step: 4363... Training loss: 1.2272... 0.1230 sec/batch\n", "Epoch: 10/20... Training Step: 4364... Training loss: 1.0251... 0.1272 sec/batch\n", "Epoch: 10/20... Training Step: 4365... Training loss: 1.0933... 0.1143 sec/batch\n", "Epoch: 10/20... Training Step: 4366... Training loss: 1.1254... 0.1266 sec/batch\n", "Epoch: 10/20... Training Step: 4367... Training loss: 1.0964... 0.1263 sec/batch\n", "Epoch: 10/20... Training Step: 4368... Training loss: 1.2000... 0.1222 sec/batch\n", "Epoch: 10/20... Training Step: 4369... Training loss: 1.1610... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4370... Training loss: 1.0537... 0.1199 sec/batch\n", "Epoch: 10/20... Training Step: 4371... Training loss: 1.1871... 0.1140 sec/batch\n", "Epoch: 10/20... Training Step: 4372... Training loss: 1.1271... 0.1151 sec/batch\n", "Epoch: 10/20... Training Step: 4373... Training loss: 1.1223... 0.1179 sec/batch\n", "Epoch: 10/20... Training Step: 4374... Training loss: 1.1731... 0.1112 sec/batch\n", "Epoch: 10/20... Training Step: 4375... Training loss: 0.9271... 0.1143 sec/batch\n", "Epoch: 10/20... Training Step: 4376... Training loss: 1.2973... 0.1319 sec/batch\n", "Epoch: 10/20... Training Step: 4377... Training loss: 1.1086... 0.1283 sec/batch\n", "Epoch: 10/20... Training Step: 4378... Training loss: 1.1284... 0.1297 sec/batch\n", "Epoch: 10/20... Training Step: 4379... Training loss: 1.0454... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4380... Training loss: 1.3970... 0.1333 sec/batch\n", "Epoch: 10/20... Training Step: 4381... Training loss: 0.9962... 0.1295 sec/batch\n", "Epoch: 10/20... Training Step: 4382... Training loss: 1.1138... 0.1370 sec/batch\n", "Epoch: 10/20... Training Step: 4383... Training loss: 1.0737... 0.1362 sec/batch\n", "Epoch: 10/20... Training Step: 4384... Training loss: 1.1975... 0.1147 sec/batch\n", "Epoch: 10/20... Training Step: 4385... Training loss: 1.2926... 0.1251 sec/batch\n", "Epoch: 10/20... Training Step: 4386... Training loss: 0.9819... 0.1269 sec/batch\n", "Epoch: 10/20... Training Step: 4387... Training loss: 1.0702... 0.1279 sec/batch\n", "Epoch: 10/20... Training Step: 4388... Training loss: 1.2559... 0.1254 sec/batch\n", "Epoch: 10/20... Training Step: 4389... Training loss: 1.2826... 0.1299 sec/batch\n", "Epoch: 10/20... Training Step: 4390... Training loss: 1.1355... 0.1319 sec/batch\n", "Epoch: 10/20... Training Step: 4391... Training loss: 1.2754... 0.1469 sec/batch\n", "Epoch: 10/20... Training Step: 4392... Training loss: 1.2225... 0.1558 sec/batch\n", "Epoch: 10/20... Training Step: 4393... Training loss: 1.1693... 0.1311 sec/batch\n", "Epoch: 10/20... Training Step: 4394... Training loss: 1.1344... 0.1245 sec/batch\n", "Epoch: 10/20... Training Step: 4395... Training loss: 1.1934... 0.1165 sec/batch\n", "Epoch: 10/20... Training Step: 4396... Training loss: 1.1168... 0.1199 sec/batch\n", "Epoch: 10/20... Training Step: 4397... Training loss: 1.0950... 0.1218 sec/batch\n", "Epoch: 10/20... Training Step: 4398... Training loss: 1.2786... 0.1203 sec/batch\n", "Epoch: 10/20... Training Step: 4399... Training loss: 1.2243... 0.1139 sec/batch\n", "Epoch: 10/20... Training Step: 4400... Training loss: 1.3285... 0.1273 sec/batch\n", "Epoch: 10/20... Training Step: 4401... Training loss: 1.2273... 0.1207 sec/batch\n", "Epoch: 10/20... Training Step: 4402... Training loss: 1.2156... 0.1163 sec/batch\n", "Epoch: 10/20... Training Step: 4403... Training loss: 1.3044... 0.1295 sec/batch\n", "Epoch: 10/20... Training Step: 4404... Training loss: 1.1220... 0.1304 sec/batch\n", "Epoch: 10/20... Training Step: 4405... Training loss: 1.1665... 0.1266 sec/batch\n", "Epoch: 10/20... Training Step: 4406... Training loss: 1.1443... 0.1259 sec/batch\n", "Epoch: 10/20... Training Step: 4407... Training loss: 1.1990... 0.1161 sec/batch\n", "Epoch: 10/20... Training Step: 4408... Training loss: 1.1773... 0.1143 sec/batch\n", "Epoch: 10/20... Training Step: 4409... Training loss: 1.5552... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4410... Training loss: 1.1643... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4411... Training loss: 1.2691... 0.1212 sec/batch\n", "Epoch: 10/20... Training Step: 4412... Training loss: 1.1812... 0.1230 sec/batch\n", "Epoch: 10/20... Training Step: 4413... Training loss: 1.2593... 0.1209 sec/batch\n", "Epoch: 10/20... Training Step: 4414... Training loss: 1.0751... 0.1175 sec/batch\n", "Epoch: 10/20... Training Step: 4415... Training loss: 1.2163... 0.1215 sec/batch\n", "Epoch: 10/20... Training Step: 4416... Training loss: 1.3049... 0.1153 sec/batch\n", "Epoch: 10/20... Training Step: 4417... Training loss: 1.1065... 0.1221 sec/batch\n", "Epoch: 10/20... Training Step: 4418... Training loss: 1.0960... 0.1295 sec/batch\n", "Epoch: 10/20... Training Step: 4419... Training loss: 1.1576... 0.1255 sec/batch\n", "Epoch: 10/20... Training Step: 4420... Training loss: 1.1537... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4421... Training loss: 1.1535... 0.1284 sec/batch\n", "Epoch: 10/20... Training Step: 4422... Training loss: 1.0833... 0.1234 sec/batch\n", "Epoch: 10/20... Training Step: 4423... Training loss: 1.1872... 0.1312 sec/batch\n", "Epoch: 10/20... Training Step: 4424... Training loss: 1.3153... 0.1357 sec/batch\n", "Epoch: 10/20... Training Step: 4425... Training loss: 1.1688... 0.1372 sec/batch\n", "Epoch: 10/20... Training Step: 4426... Training loss: 1.1577... 0.1385 sec/batch\n", "Epoch: 10/20... Training Step: 4427... Training loss: 1.2140... 0.1339 sec/batch\n", "Epoch: 10/20... Training Step: 4428... Training loss: 1.2322... 0.1403 sec/batch\n", "Epoch: 10/20... Training Step: 4429... Training loss: 1.0904... 0.1304 sec/batch\n", "Epoch: 10/20... Training Step: 4430... Training loss: 1.1745... 0.1270 sec/batch\n", "Epoch: 10/20... Training Step: 4431... Training loss: 1.1599... 0.1208 sec/batch\n", "Epoch: 10/20... Training Step: 4432... Training loss: 1.0859... 0.1125 sec/batch\n", "Epoch: 10/20... Training Step: 4433... Training loss: 1.2790... 0.1152 sec/batch\n", "Epoch: 10/20... Training Step: 4434... Training loss: 1.0800... 0.1207 sec/batch\n", "Epoch: 10/20... Training Step: 4435... Training loss: 1.1067... 0.1300 sec/batch\n", "Epoch: 10/20... Training Step: 4436... Training loss: 1.1353... 0.1255 sec/batch\n", "Epoch: 10/20... Training Step: 4437... Training loss: 1.1365... 0.1274 sec/batch\n", "Epoch: 10/20... Training Step: 4438... Training loss: 1.1661... 0.1156 sec/batch\n", "Epoch: 10/20... Training Step: 4439... Training loss: 1.1877... 0.1297 sec/batch\n", "Epoch: 10/20... Training Step: 4440... Training loss: 1.1833... 0.1274 sec/batch\n", "Epoch: 10/20... Training Step: 4441... Training loss: 1.2668... 0.1241 sec/batch\n", "Epoch: 10/20... Training Step: 4442... Training loss: 1.1587... 0.1218 sec/batch\n", "Epoch: 10/20... Training Step: 4443... Training loss: 1.2971... 0.1245 sec/batch\n", "Epoch: 10/20... Training Step: 4444... Training loss: 1.3498... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4445... Training loss: 1.2474... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4446... Training loss: 1.3338... 0.1268 sec/batch\n", "Epoch: 10/20... Training Step: 4447... Training loss: 1.2505... 0.1300 sec/batch\n", "Epoch: 10/20... Training Step: 4448... Training loss: 1.3320... 0.1191 sec/batch\n", "Epoch: 10/20... Training Step: 4449... Training loss: 1.3439... 0.1233 sec/batch\n", "Epoch: 10/20... Training Step: 4450... Training loss: 1.2009... 0.1234 sec/batch\n", "Epoch: 10/20... Training Step: 4451... Training loss: 1.2329... 0.1283 sec/batch\n", "Epoch: 10/20... Training Step: 4452... Training loss: 1.1888... 0.1180 sec/batch\n", "Epoch: 10/20... Training Step: 4453... Training loss: 1.1294... 0.1290 sec/batch\n", "Epoch: 10/20... Training Step: 4454... Training loss: 1.4821... 0.1228 sec/batch\n", "Epoch: 10/20... Training Step: 4455... Training loss: 1.1302... 0.1146 sec/batch\n", "Epoch: 10/20... Training Step: 4456... Training loss: 1.1801... 0.1197 sec/batch\n", "Epoch: 10/20... Training Step: 4457... Training loss: 1.1166... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4458... Training loss: 1.1651... 0.1258 sec/batch\n", "Epoch: 10/20... Training Step: 4459... Training loss: 1.0581... 0.1248 sec/batch\n", "Epoch: 10/20... Training Step: 4460... Training loss: 1.1582... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4461... Training loss: 0.9890... 0.1224 sec/batch\n", "Epoch: 10/20... Training Step: 4462... Training loss: 1.1723... 0.1177 sec/batch\n", "Epoch: 10/20... Training Step: 4463... Training loss: 1.1728... 0.1238 sec/batch\n", "Epoch: 10/20... Training Step: 4464... Training loss: 1.3096... 0.1273 sec/batch\n", "Epoch: 10/20... Training Step: 4465... Training loss: 1.2258... 0.1166 sec/batch\n", "Epoch: 10/20... Training Step: 4466... Training loss: 1.2037... 0.1208 sec/batch\n", "Epoch: 10/20... Training Step: 4467... Training loss: 1.1569... 0.1206 sec/batch\n", "Epoch: 10/20... Training Step: 4468... Training loss: 1.1468... 0.1305 sec/batch\n", "Epoch: 10/20... Training Step: 4469... Training loss: 1.0369... 0.1243 sec/batch\n", "Epoch: 10/20... Training Step: 4470... Training loss: 1.1994... 0.1239 sec/batch\n", "Epoch: 10/20... Training Step: 4471... Training loss: 1.2354... 0.1197 sec/batch\n", "Epoch: 10/20... Training Step: 4472... Training loss: 1.5016... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4473... Training loss: 1.0471... 0.1172 sec/batch\n", "Epoch: 10/20... Training Step: 4474... Training loss: 1.0868... 0.1239 sec/batch\n", "Epoch: 10/20... Training Step: 4475... Training loss: 1.2115... 0.1340 sec/batch\n", "Epoch: 10/20... Training Step: 4476... Training loss: 1.1275... 0.1198 sec/batch\n", "Epoch: 10/20... Training Step: 4477... Training loss: 1.1468... 0.1197 sec/batch\n", "Epoch: 10/20... Training Step: 4478... Training loss: 1.0887... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4479... Training loss: 0.9418... 0.1179 sec/batch\n", "Epoch: 10/20... Training Step: 4480... Training loss: 1.2917... 0.1269 sec/batch\n", "Epoch: 10/20... Training Step: 4481... Training loss: 0.9377... 0.1306 sec/batch\n", "Epoch: 10/20... Training Step: 4482... Training loss: 1.2369... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4483... Training loss: 1.2461... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4484... Training loss: 1.4924... 0.1250 sec/batch\n", "Epoch: 10/20... Training Step: 4485... Training loss: 1.2933... 0.1255 sec/batch\n", "Epoch: 10/20... Training Step: 4486... Training loss: 1.2726... 0.1235 sec/batch\n", "Epoch: 10/20... Training Step: 4487... Training loss: 1.1304... 0.1151 sec/batch\n", "Epoch: 10/20... Training Step: 4488... Training loss: 1.1263... 0.1171 sec/batch\n", "Epoch: 10/20... Training Step: 4489... Training loss: 1.0737... 0.1173 sec/batch\n", "Epoch: 10/20... Training Step: 4490... Training loss: 1.1072... 0.1154 sec/batch\n", "Epoch: 10/20... Training Step: 4491... Training loss: 0.9944... 0.1206 sec/batch\n", "Epoch: 10/20... Training Step: 4492... Training loss: 1.1145... 0.1198 sec/batch\n", "Epoch: 10/20... Training Step: 4493... Training loss: 0.9994... 0.1198 sec/batch\n", "Epoch: 10/20... Training Step: 4494... Training loss: 1.0649... 0.1147 sec/batch\n", "Epoch: 10/20... Training Step: 4495... Training loss: 1.0713... 0.1200 sec/batch\n", "Epoch: 10/20... Training Step: 4496... Training loss: 1.0945... 0.1192 sec/batch\n", "Epoch: 10/20... Training Step: 4497... Training loss: 1.0212... 0.1200 sec/batch\n", "Epoch: 10/20... Training Step: 4498... Training loss: 1.2230... 0.1197 sec/batch\n", "Epoch: 10/20... Training Step: 4499... Training loss: 1.1378... 0.1165 sec/batch\n", "Epoch: 10/20... Training Step: 4500... Training loss: 0.9861... 0.1162 sec/batch\n", "Epoch: 10/20... Training Step: 4501... Training loss: 1.0085... 0.1190 sec/batch\n", "Epoch: 10/20... Training Step: 4502... Training loss: 0.9308... 0.1192 sec/batch\n", "Epoch: 10/20... Training Step: 4503... Training loss: 1.1057... 0.1144 sec/batch\n", "Epoch: 10/20... Training Step: 4504... Training loss: 1.0586... 0.1177 sec/batch\n", "Epoch: 10/20... Training Step: 4505... Training loss: 1.1945... 0.1206 sec/batch\n", "Epoch: 10/20... Training Step: 4506... Training loss: 1.0820... 0.1199 sec/batch\n", "Epoch: 10/20... Training Step: 4507... Training loss: 1.1202... 0.1169 sec/batch\n", "Epoch: 10/20... Training Step: 4508... Training loss: 1.1445... 0.1169 sec/batch\n", "Epoch: 10/20... Training Step: 4509... Training loss: 1.0296... 0.1202 sec/batch\n", "Epoch: 10/20... Training Step: 4510... Training loss: 1.0141... 0.1168 sec/batch\n", "Epoch: 10/20... Training Step: 4511... Training loss: 1.1108... 0.1178 sec/batch\n", "Epoch: 10/20... Training Step: 4512... Training loss: 1.0957... 0.1168 sec/batch\n", "Epoch: 10/20... Training Step: 4513... Training loss: 1.0818... 0.1172 sec/batch\n", "Epoch: 10/20... Training Step: 4514... Training loss: 1.0928... 0.1179 sec/batch\n", "Epoch: 10/20... Training Step: 4515... Training loss: 1.2028... 0.1150 sec/batch\n", "Epoch: 10/20... Training Step: 4516... Training loss: 1.0266... 0.1197 sec/batch\n", "Epoch: 10/20... Training Step: 4517... Training loss: 1.0507... 0.1184 sec/batch\n", "Epoch: 10/20... Training Step: 4518... Training loss: 1.1874... 0.1169 sec/batch\n", "Epoch: 10/20... Training Step: 4519... Training loss: 0.9585... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4520... Training loss: 1.1214... 0.1168 sec/batch\n", "Epoch: 10/20... Training Step: 4521... Training loss: 0.9954... 0.1201 sec/batch\n", "Epoch: 10/20... Training Step: 4522... Training loss: 0.9271... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4523... Training loss: 1.0071... 0.1180 sec/batch\n", "Epoch: 10/20... Training Step: 4524... Training loss: 1.2725... 0.1180 sec/batch\n", "Epoch: 10/20... Training Step: 4525... Training loss: 1.0510... 0.1200 sec/batch\n", "Epoch: 10/20... Training Step: 4526... Training loss: 1.0897... 0.1184 sec/batch\n", "Epoch: 10/20... Training Step: 4527... Training loss: 1.1484... 0.1235 sec/batch\n", "Epoch: 10/20... Training Step: 4528... Training loss: 0.9878... 0.1188 sec/batch\n", "Epoch: 10/20... Training Step: 4529... Training loss: 0.9875... 0.1239 sec/batch\n", "Epoch: 10/20... Training Step: 4530... Training loss: 0.8642... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4531... Training loss: 1.1479... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4532... Training loss: 1.0864... 0.1302 sec/batch\n", "Epoch: 10/20... Training Step: 4533... Training loss: 1.0855... 0.1257 sec/batch\n", "Epoch: 10/20... Training Step: 4534... Training loss: 1.0987... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4535... Training loss: 1.2618... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4536... Training loss: 0.8980... 0.1277 sec/batch\n", "Epoch: 10/20... Training Step: 4537... Training loss: 1.3095... 0.1407 sec/batch\n", "Epoch: 10/20... Training Step: 4538... Training loss: 1.2216... 0.1280 sec/batch\n", "Epoch: 10/20... Training Step: 4539... Training loss: 0.9911... 0.1265 sec/batch\n", "Epoch: 10/20... Training Step: 4540... Training loss: 1.1801... 0.1215 sec/batch\n", "Epoch: 10/20... Training Step: 4541... Training loss: 1.0355... 0.1281 sec/batch\n", "Epoch: 10/20... Training Step: 4542... Training loss: 1.1698... 0.1282 sec/batch\n", "Epoch: 10/20... Training Step: 4543... Training loss: 1.1100... 0.1263 sec/batch\n", "Epoch: 10/20... Training Step: 4544... Training loss: 1.2247... 0.1322 sec/batch\n", "Epoch: 10/20... Training Step: 4545... Training loss: 1.0733... 0.1172 sec/batch\n", "Epoch: 10/20... Training Step: 4546... Training loss: 1.1531... 0.1336 sec/batch\n", "Epoch: 10/20... Training Step: 4547... Training loss: 1.0669... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4548... Training loss: 1.1865... 0.1177 sec/batch\n", "Epoch: 10/20... Training Step: 4549... Training loss: 1.0054... 0.1170 sec/batch\n", "Epoch: 10/20... Training Step: 4550... Training loss: 1.2985... 0.1311 sec/batch\n", "Epoch: 10/20... Training Step: 4551... Training loss: 1.1856... 0.1204 sec/batch\n", "Epoch: 10/20... Training Step: 4552... Training loss: 1.2340... 0.1270 sec/batch\n", "Epoch: 10/20... Training Step: 4553... Training loss: 1.2563... 0.1210 sec/batch\n", "Epoch: 10/20... Training Step: 4554... Training loss: 1.2695... 0.1263 sec/batch\n", "Epoch: 10/20... Training Step: 4555... Training loss: 1.0730... 0.1205 sec/batch\n", "Epoch: 10/20... Training Step: 4556... Training loss: 1.1440... 0.1252 sec/batch\n", "Epoch: 10/20... Training Step: 4557... Training loss: 0.9799... 0.1297 sec/batch\n", "Epoch: 10/20... Training Step: 4558... Training loss: 1.0193... 0.1175 sec/batch\n", "Epoch: 10/20... Training Step: 4559... Training loss: 1.1328... 0.1203 sec/batch\n", "Epoch: 10/20... Training Step: 4560... Training loss: 1.1403... 0.1213 sec/batch\n", "Epoch: 10/20... Training Step: 4561... Training loss: 1.0056... 0.1238 sec/batch\n", "Epoch: 10/20... Training Step: 4562... Training loss: 1.2066... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4563... Training loss: 1.2016... 0.1253 sec/batch\n", "Epoch: 10/20... Training Step: 4564... Training loss: 0.9988... 0.1284 sec/batch\n", "Epoch: 10/20... Training Step: 4565... Training loss: 0.9943... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4566... Training loss: 1.0810... 0.1304 sec/batch\n", "Epoch: 10/20... Training Step: 4567... Training loss: 0.9547... 0.1391 sec/batch\n", "Epoch: 10/20... Training Step: 4568... Training loss: 1.0027... 0.1273 sec/batch\n", "Epoch: 10/20... Training Step: 4569... Training loss: 1.0545... 0.1247 sec/batch\n", "Epoch: 10/20... Training Step: 4570... Training loss: 1.1210... 0.1379 sec/batch\n", "Epoch: 10/20... Training Step: 4571... Training loss: 1.0725... 0.1288 sec/batch\n", "Epoch: 10/20... Training Step: 4572... Training loss: 1.2257... 0.1273 sec/batch\n", "Epoch: 10/20... Training Step: 4573... Training loss: 1.0468... 0.1264 sec/batch\n", "Epoch: 10/20... Training Step: 4574... Training loss: 1.1249... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4575... Training loss: 1.0677... 0.1221 sec/batch\n", "Epoch: 10/20... Training Step: 4576... Training loss: 1.0749... 0.1219 sec/batch\n", "Epoch: 10/20... Training Step: 4577... Training loss: 1.0591... 0.1293 sec/batch\n", "Epoch: 10/20... Training Step: 4578... Training loss: 0.9274... 0.1282 sec/batch\n", "Epoch: 10/20... Training Step: 4579... Training loss: 1.0867... 0.1275 sec/batch\n", "Epoch: 10/20... Training Step: 4580... Training loss: 1.2035... 0.1283 sec/batch\n", "Epoch: 10/20... Training Step: 4581... Training loss: 1.1601... 0.1315 sec/batch\n", "Epoch: 10/20... Training Step: 4582... Training loss: 1.2037... 0.1343 sec/batch\n", "Epoch: 10/20... Training Step: 4583... Training loss: 1.0257... 0.1341 sec/batch\n", "Epoch: 10/20... Training Step: 4584... Training loss: 1.4482... 0.1222 sec/batch\n", "Epoch: 10/20... Training Step: 4585... Training loss: 1.0753... 0.1174 sec/batch\n", "Epoch: 10/20... Training Step: 4586... Training loss: 1.0926... 0.1246 sec/batch\n", "Epoch: 10/20... Training Step: 4587... Training loss: 1.0699... 0.1184 sec/batch\n", "Epoch: 10/20... Training Step: 4588... Training loss: 1.1414... 0.1176 sec/batch\n", "Epoch: 10/20... Training Step: 4589... Training loss: 1.1855... 0.1237 sec/batch\n", "Epoch: 10/20... Training Step: 4590... Training loss: 1.0433... 0.1248 sec/batch\n", "Epoch: 10/20... Training Step: 4591... Training loss: 1.2090... 0.1231 sec/batch\n", "Epoch: 10/20... Training Step: 4592... Training loss: 1.4427... 0.1291 sec/batch\n", "Epoch: 10/20... Training Step: 4593... Training loss: 1.2945... 0.1301 sec/batch\n", "Epoch: 10/20... Training Step: 4594... Training loss: 0.9396... 0.1180 sec/batch\n", "Epoch: 10/20... Training Step: 4595... Training loss: 1.0788... 0.1161 sec/batch\n", "Epoch: 10/20... Training Step: 4596... Training loss: 0.9580... 0.1257 sec/batch\n", "Epoch: 10/20... Training Step: 4597... Training loss: 1.1633... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4598... Training loss: 1.1444... 0.1165 sec/batch\n", "Epoch: 10/20... Training Step: 4599... Training loss: 1.1302... 0.1147 sec/batch\n", "Epoch: 10/20... Training Step: 4600... Training loss: 1.2508... 0.1204 sec/batch\n", "Epoch: 10/20... Training Step: 4601... Training loss: 1.0822... 0.1165 sec/batch\n", "Epoch: 10/20... Training Step: 4602... Training loss: 1.1038... 0.1170 sec/batch\n", "Epoch: 10/20... Training Step: 4603... Training loss: 1.1726... 0.1276 sec/batch\n", "Epoch: 10/20... Training Step: 4604... Training loss: 1.1037... 0.1211 sec/batch\n", "Epoch: 10/20... Training Step: 4605... Training loss: 1.0177... 0.1192 sec/batch\n", "Epoch: 10/20... Training Step: 4606... Training loss: 1.1115... 0.1289 sec/batch\n", "Epoch: 10/20... Training Step: 4607... Training loss: 1.3377... 0.1181 sec/batch\n", "Epoch: 10/20... Training Step: 4608... Training loss: 1.1337... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4609... Training loss: 1.3820... 0.1271 sec/batch\n", "Epoch: 10/20... Training Step: 4610... Training loss: 1.2910... 0.1222 sec/batch\n", "Epoch: 10/20... Training Step: 4611... Training loss: 0.9744... 0.1175 sec/batch\n", "Epoch: 10/20... Training Step: 4612... Training loss: 1.1844... 0.1256 sec/batch\n", "Epoch: 10/20... Training Step: 4613... Training loss: 1.0170... 0.1194 sec/batch\n", "Epoch: 10/20... Training Step: 4614... Training loss: 1.2857... 0.1103 sec/batch\n", "Epoch: 10/20... Training Step: 4615... Training loss: 1.3345... 0.1289 sec/batch\n", "Epoch: 10/20... Training Step: 4616... Training loss: 1.2986... 0.1235 sec/batch\n", "Epoch: 10/20... Training Step: 4617... Training loss: 0.9613... 0.1251 sec/batch\n", "Epoch: 10/20... Training Step: 4618... Training loss: 1.1451... 0.1227 sec/batch\n", "Epoch: 10/20... Training Step: 4619... Training loss: 1.2347... 0.1267 sec/batch\n", "Epoch: 10/20... Training Step: 4620... Training loss: 1.1812... 0.1261 sec/batch\n", "Epoch: 10/20... Training Step: 4621... Training loss: 1.2636... 0.1266 sec/batch\n", "Epoch: 10/20... Training Step: 4622... Training loss: 1.1318... 0.1233 sec/batch\n", "Epoch: 10/20... Training Step: 4623... Training loss: 1.0897... 0.1202 sec/batch\n", "Epoch: 10/20... Training Step: 4624... Training loss: 0.9945... 0.1241 sec/batch\n", "Epoch: 10/20... Training Step: 4625... Training loss: 1.2001... 0.1261 sec/batch\n", "Epoch: 10/20... Training Step: 4626... Training loss: 1.1133... 0.1224 sec/batch\n", "Epoch: 10/20... Training Step: 4627... Training loss: 1.0142... 0.1242 sec/batch\n", "Epoch: 10/20... Training Step: 4628... Training loss: 1.2936... 0.1266 sec/batch\n", "Epoch: 10/20... Training Step: 4629... Training loss: 1.0897... 0.1272 sec/batch\n", "Epoch: 10/20... Training Step: 4630... Training loss: 1.4363... 0.1220 sec/batch\n", "Epoch: 10/20... Training Step: 4631... Training loss: 1.2970... 0.1225 sec/batch\n", "Epoch: 10/20... Training Step: 4632... Training loss: 0.9405... 0.1284 sec/batch\n", "Epoch: 10/20... Training Step: 4633... Training loss: 1.0258... 0.1305 sec/batch\n", "Epoch: 10/20... Training Step: 4634... Training loss: 0.9095... 0.1160 sec/batch\n", "Epoch: 10/20... Training Step: 4635... Training loss: 0.9603... 0.1237 sec/batch\n", "Epoch: 10/20... Training Step: 4636... Training loss: 1.1425... 0.1233 sec/batch\n", "Epoch: 10/20... Training Step: 4637... Training loss: 1.1764... 0.1144 sec/batch\n", "Epoch: 10/20... Training Step: 4638... Training loss: 1.0053... 0.1136 sec/batch\n", "Epoch: 10/20... Training Step: 4639... Training loss: 1.1530... 0.1142 sec/batch\n", "Epoch: 10/20... Training Step: 4640... Training loss: 1.0521... 0.1248 sec/batch\n", "Epoch: 11/20... Training Step: 4641... Training loss: 1.4921... 0.1404 sec/batch\n", "Epoch: 11/20... Training Step: 4642... Training loss: 1.1083... 0.1331 sec/batch\n", "Epoch: 11/20... Training Step: 4643... Training loss: 1.0618... 0.1265 sec/batch\n", "Epoch: 11/20... Training Step: 4644... Training loss: 1.1195... 0.1216 sec/batch\n", "Epoch: 11/20... Training Step: 4645... Training loss: 1.1696... 0.1255 sec/batch\n", "Epoch: 11/20... Training Step: 4646... Training loss: 1.0381... 0.1247 sec/batch\n", "Epoch: 11/20... Training Step: 4647... Training loss: 1.2149... 0.1256 sec/batch\n", "Epoch: 11/20... Training Step: 4648... Training loss: 1.0056... 0.1261 sec/batch\n", "Epoch: 11/20... Training Step: 4649... Training loss: 1.0171... 0.1266 sec/batch\n", "Epoch: 11/20... Training Step: 4650... Training loss: 1.1183... 0.1316 sec/batch\n", "Epoch: 11/20... Training Step: 4651... Training loss: 1.1784... 0.1261 sec/batch\n", "Epoch: 11/20... Training Step: 4652... Training loss: 0.9166... 0.1324 sec/batch\n", "Epoch: 11/20... Training Step: 4653... Training loss: 1.3499... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4654... Training loss: 0.8840... 0.1201 sec/batch\n", "Epoch: 11/20... Training Step: 4655... Training loss: 1.2156... 0.1242 sec/batch\n", "Epoch: 11/20... Training Step: 4656... Training loss: 1.2437... 0.1250 sec/batch\n", "Epoch: 11/20... Training Step: 4657... Training loss: 1.1115... 0.1195 sec/batch\n", "Epoch: 11/20... Training Step: 4658... Training loss: 1.2028... 0.1223 sec/batch\n", "Epoch: 11/20... Training Step: 4659... Training loss: 1.1225... 0.1240 sec/batch\n", "Epoch: 11/20... Training Step: 4660... Training loss: 1.0719... 0.1304 sec/batch\n", "Epoch: 11/20... Training Step: 4661... Training loss: 1.2356... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 4662... Training loss: 1.0856... 0.1282 sec/batch\n", "Epoch: 11/20... Training Step: 4663... Training loss: 1.2345... 0.1260 sec/batch\n", "Epoch: 11/20... Training Step: 4664... Training loss: 1.0212... 0.1320 sec/batch\n", "Epoch: 11/20... Training Step: 4665... Training loss: 1.1290... 0.1410 sec/batch\n", "Epoch: 11/20... Training Step: 4666... Training loss: 1.2081... 0.1309 sec/batch\n", "Epoch: 11/20... Training Step: 4667... Training loss: 1.1725... 0.1348 sec/batch\n", "Epoch: 11/20... Training Step: 4668... Training loss: 0.9423... 0.1353 sec/batch\n", "Epoch: 11/20... Training Step: 4669... Training loss: 1.1628... 0.1345 sec/batch\n", "Epoch: 11/20... Training Step: 4670... Training loss: 1.1500... 0.1296 sec/batch\n", "Epoch: 11/20... Training Step: 4671... Training loss: 0.9715... 0.1242 sec/batch\n", "Epoch: 11/20... Training Step: 4672... Training loss: 1.0834... 0.1317 sec/batch\n", "Epoch: 11/20... Training Step: 4673... Training loss: 0.9153... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 4674... Training loss: 0.9855... 0.1238 sec/batch\n", "Epoch: 11/20... Training Step: 4675... Training loss: 0.9815... 0.1328 sec/batch\n", "Epoch: 11/20... Training Step: 4676... Training loss: 1.0538... 0.1295 sec/batch\n", "Epoch: 11/20... Training Step: 4677... Training loss: 1.1860... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4678... Training loss: 1.0233... 0.1294 sec/batch\n", "Epoch: 11/20... Training Step: 4679... Training loss: 1.0704... 0.1239 sec/batch\n", "Epoch: 11/20... Training Step: 4680... Training loss: 1.3717... 0.1304 sec/batch\n", "Epoch: 11/20... Training Step: 4681... Training loss: 1.1149... 0.1337 sec/batch\n", "Epoch: 11/20... Training Step: 4682... Training loss: 1.0458... 0.1337 sec/batch\n", "Epoch: 11/20... Training Step: 4683... Training loss: 1.2264... 0.1287 sec/batch\n", "Epoch: 11/20... Training Step: 4684... Training loss: 0.8999... 0.1347 sec/batch\n", "Epoch: 11/20... Training Step: 4685... Training loss: 1.0690... 0.1288 sec/batch\n", "Epoch: 11/20... Training Step: 4686... Training loss: 1.0925... 0.1311 sec/batch\n", "Epoch: 11/20... Training Step: 4687... Training loss: 1.1917... 0.1299 sec/batch\n", "Epoch: 11/20... Training Step: 4688... Training loss: 1.1074... 0.1171 sec/batch\n", "Epoch: 11/20... Training Step: 4689... Training loss: 1.1166... 0.1326 sec/batch\n", "Epoch: 11/20... Training Step: 4690... Training loss: 1.0760... 0.1327 sec/batch\n", "Epoch: 11/20... Training Step: 4691... Training loss: 1.1052... 0.1282 sec/batch\n", "Epoch: 11/20... Training Step: 4692... Training loss: 1.2709... 0.1327 sec/batch\n", "Epoch: 11/20... Training Step: 4693... Training loss: 1.1516... 0.1324 sec/batch\n", "Epoch: 11/20... Training Step: 4694... Training loss: 1.0441... 0.1301 sec/batch\n", "Epoch: 11/20... Training Step: 4695... Training loss: 1.0230... 0.1282 sec/batch\n", "Epoch: 11/20... Training Step: 4696... Training loss: 1.0265... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 4697... Training loss: 1.0687... 0.1218 sec/batch\n", "Epoch: 11/20... Training Step: 4698... Training loss: 1.2254... 0.1244 sec/batch\n", "Epoch: 11/20... Training Step: 4699... Training loss: 0.8824... 0.1268 sec/batch\n", "Epoch: 11/20... Training Step: 4700... Training loss: 1.0476... 0.1315 sec/batch\n", "Epoch: 11/20... Training Step: 4701... Training loss: 1.0092... 0.1349 sec/batch\n", "Epoch: 11/20... Training Step: 4702... Training loss: 1.2437... 0.1364 sec/batch\n", "Epoch: 11/20... Training Step: 4703... Training loss: 0.9482... 0.1277 sec/batch\n", "Epoch: 11/20... Training Step: 4704... Training loss: 1.1660... 0.1343 sec/batch\n", "Epoch: 11/20... Training Step: 4705... Training loss: 0.9995... 0.1366 sec/batch\n", "Epoch: 11/20... Training Step: 4706... Training loss: 1.1502... 0.1349 sec/batch\n", "Epoch: 11/20... Training Step: 4707... Training loss: 1.1054... 0.1373 sec/batch\n", "Epoch: 11/20... Training Step: 4708... Training loss: 1.1832... 0.1276 sec/batch\n", "Epoch: 11/20... Training Step: 4709... Training loss: 0.9893... 0.1375 sec/batch\n", "Epoch: 11/20... Training Step: 4710... Training loss: 1.0998... 0.1383 sec/batch\n", "Epoch: 11/20... Training Step: 4711... Training loss: 1.2514... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4712... Training loss: 0.9839... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 4713... Training loss: 1.1467... 0.1353 sec/batch\n", "Epoch: 11/20... Training Step: 4714... Training loss: 1.0221... 0.1285 sec/batch\n", "Epoch: 11/20... Training Step: 4715... Training loss: 1.2358... 0.1355 sec/batch\n", "Epoch: 11/20... Training Step: 4716... Training loss: 1.0757... 0.1316 sec/batch\n", "Epoch: 11/20... Training Step: 4717... Training loss: 1.0302... 0.1318 sec/batch\n", "Epoch: 11/20... Training Step: 4718... Training loss: 1.1453... 0.1320 sec/batch\n", "Epoch: 11/20... Training Step: 4719... Training loss: 1.2225... 0.1351 sec/batch\n", "Epoch: 11/20... Training Step: 4720... Training loss: 0.9376... 0.1336 sec/batch\n", "Epoch: 11/20... Training Step: 4721... Training loss: 1.2626... 0.1297 sec/batch\n", "Epoch: 11/20... Training Step: 4722... Training loss: 1.0687... 0.1204 sec/batch\n", "Epoch: 11/20... Training Step: 4723... Training loss: 1.0852... 0.1272 sec/batch\n", "Epoch: 11/20... Training Step: 4724... Training loss: 1.2302... 0.1272 sec/batch\n", "Epoch: 11/20... Training Step: 4725... Training loss: 1.0821... 0.1205 sec/batch\n", "Epoch: 11/20... Training Step: 4726... Training loss: 1.2828... 0.1286 sec/batch\n", "Epoch: 11/20... Training Step: 4727... Training loss: 0.9707... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4728... Training loss: 1.1511... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4729... Training loss: 1.1880... 0.1204 sec/batch\n", "Epoch: 11/20... Training Step: 4730... Training loss: 1.0725... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 4731... Training loss: 1.1922... 0.1298 sec/batch\n", "Epoch: 11/20... Training Step: 4732... Training loss: 1.1713... 0.1228 sec/batch\n", "Epoch: 11/20... Training Step: 4733... Training loss: 0.9878... 0.1242 sec/batch\n", "Epoch: 11/20... Training Step: 4734... Training loss: 1.1699... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 4735... Training loss: 1.1313... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4736... Training loss: 1.0617... 0.1284 sec/batch\n", "Epoch: 11/20... Training Step: 4737... Training loss: 1.2983... 0.1285 sec/batch\n", "Epoch: 11/20... Training Step: 4738... Training loss: 1.1603... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 4739... Training loss: 1.1196... 0.1170 sec/batch\n", "Epoch: 11/20... Training Step: 4740... Training loss: 1.1457... 0.1281 sec/batch\n", "Epoch: 11/20... Training Step: 4741... Training loss: 1.1547... 0.1291 sec/batch\n", "Epoch: 11/20... Training Step: 4742... Training loss: 1.3098... 0.1180 sec/batch\n", "Epoch: 11/20... Training Step: 4743... Training loss: 1.2495... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 4744... Training loss: 1.1795... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4745... Training loss: 1.2423... 0.1138 sec/batch\n", "Epoch: 11/20... Training Step: 4746... Training loss: 1.2896... 0.1226 sec/batch\n", "Epoch: 11/20... Training Step: 4747... Training loss: 1.0368... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 4748... Training loss: 1.2004... 0.1190 sec/batch\n", "Epoch: 11/20... Training Step: 4749... Training loss: 1.1259... 0.1195 sec/batch\n", "Epoch: 11/20... Training Step: 4750... Training loss: 1.0013... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 4751... Training loss: 1.1755... 0.1277 sec/batch\n", "Epoch: 11/20... Training Step: 4752... Training loss: 0.9733... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 4753... Training loss: 1.2272... 0.1228 sec/batch\n", "Epoch: 11/20... Training Step: 4754... Training loss: 1.3725... 0.1227 sec/batch\n", "Epoch: 11/20... Training Step: 4755... Training loss: 1.1601... 0.1187 sec/batch\n", "Epoch: 11/20... Training Step: 4756... Training loss: 1.1013... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 4757... Training loss: 1.2000... 0.1242 sec/batch\n", "Epoch: 11/20... Training Step: 4758... Training loss: 1.2297... 0.1234 sec/batch\n", "Epoch: 11/20... Training Step: 4759... Training loss: 1.1158... 0.1196 sec/batch\n", "Epoch: 11/20... Training Step: 4760... Training loss: 1.0306... 0.1190 sec/batch\n", "Epoch: 11/20... Training Step: 4761... Training loss: 1.1655... 0.1194 sec/batch\n", "Epoch: 11/20... Training Step: 4762... Training loss: 1.2437... 0.1229 sec/batch\n", "Epoch: 11/20... Training Step: 4763... Training loss: 1.0416... 0.1205 sec/batch\n", "Epoch: 11/20... Training Step: 4764... Training loss: 1.1350... 0.1180 sec/batch\n", "Epoch: 11/20... Training Step: 4765... Training loss: 1.2315... 0.1232 sec/batch\n", "Epoch: 11/20... Training Step: 4766... Training loss: 0.9244... 0.1179 sec/batch\n", "Epoch: 11/20... Training Step: 4767... Training loss: 1.0917... 0.1162 sec/batch\n", "Epoch: 11/20... Training Step: 4768... Training loss: 1.2004... 0.1220 sec/batch\n", "Epoch: 11/20... Training Step: 4769... Training loss: 1.1902... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 4770... Training loss: 1.0264... 0.1217 sec/batch\n", "Epoch: 11/20... Training Step: 4771... Training loss: 1.3077... 0.1200 sec/batch\n", "Epoch: 11/20... Training Step: 4772... Training loss: 1.1000... 0.1209 sec/batch\n", "Epoch: 11/20... Training Step: 4773... Training loss: 1.0438... 0.1192 sec/batch\n", "Epoch: 11/20... Training Step: 4774... Training loss: 1.2187... 0.1253 sec/batch\n", "Epoch: 11/20... Training Step: 4775... Training loss: 0.9529... 0.1157 sec/batch\n", "Epoch: 11/20... Training Step: 4776... Training loss: 0.8896... 0.1170 sec/batch\n", "Epoch: 11/20... Training Step: 4777... Training loss: 0.9323... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4778... Training loss: 1.0310... 0.1322 sec/batch\n", "Epoch: 11/20... Training Step: 4779... Training loss: 1.0382... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4780... Training loss: 1.2112... 0.1289 sec/batch\n", "Epoch: 11/20... Training Step: 4781... Training loss: 0.9467... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 4782... Training loss: 1.0605... 0.1251 sec/batch\n", "Epoch: 11/20... Training Step: 4783... Training loss: 0.9958... 0.1199 sec/batch\n", "Epoch: 11/20... Training Step: 4784... Training loss: 1.1139... 0.1181 sec/batch\n", "Epoch: 11/20... Training Step: 4785... Training loss: 1.1536... 0.1255 sec/batch\n", "Epoch: 11/20... Training Step: 4786... Training loss: 1.0271... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 4787... Training loss: 1.1275... 0.1238 sec/batch\n", "Epoch: 11/20... Training Step: 4788... Training loss: 1.0454... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 4789... Training loss: 1.0988... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4790... Training loss: 1.2477... 0.1334 sec/batch\n", "Epoch: 11/20... Training Step: 4791... Training loss: 1.3062... 0.1250 sec/batch\n", "Epoch: 11/20... Training Step: 4792... Training loss: 1.1342... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 4793... Training loss: 1.2682... 0.1228 sec/batch\n", "Epoch: 11/20... Training Step: 4794... Training loss: 1.1047... 0.1213 sec/batch\n", "Epoch: 11/20... Training Step: 4795... Training loss: 1.1321... 0.1207 sec/batch\n", "Epoch: 11/20... Training Step: 4796... Training loss: 1.0380... 0.1226 sec/batch\n", "Epoch: 11/20... Training Step: 4797... Training loss: 1.0676... 0.1265 sec/batch\n", "Epoch: 11/20... Training Step: 4798... Training loss: 1.1071... 0.1245 sec/batch\n", "Epoch: 11/20... Training Step: 4799... Training loss: 1.0611... 0.1301 sec/batch\n", "Epoch: 11/20... Training Step: 4800... Training loss: 1.0948... 0.1302 sec/batch\n", "Epoch: 11/20... Training Step: 4801... Training loss: 1.0266... 0.1236 sec/batch\n", "Epoch: 11/20... Training Step: 4802... Training loss: 1.0968... 0.1261 sec/batch\n", "Epoch: 11/20... Training Step: 4803... Training loss: 1.3681... 0.1304 sec/batch\n", "Epoch: 11/20... Training Step: 4804... Training loss: 1.0007... 0.1185 sec/batch\n", "Epoch: 11/20... Training Step: 4805... Training loss: 1.1843... 0.1248 sec/batch\n", "Epoch: 11/20... Training Step: 4806... Training loss: 1.0318... 0.1206 sec/batch\n", "Epoch: 11/20... Training Step: 4807... Training loss: 0.9896... 0.1269 sec/batch\n", "Epoch: 11/20... Training Step: 4808... Training loss: 1.2661... 0.1266 sec/batch\n", "Epoch: 11/20... Training Step: 4809... Training loss: 0.9427... 0.1264 sec/batch\n", "Epoch: 11/20... Training Step: 4810... Training loss: 1.1186... 0.1323 sec/batch\n", "Epoch: 11/20... Training Step: 4811... Training loss: 1.1462... 0.1232 sec/batch\n", "Epoch: 11/20... Training Step: 4812... Training loss: 1.3441... 0.1241 sec/batch\n", "Epoch: 11/20... Training Step: 4813... Training loss: 1.0161... 0.1240 sec/batch\n", "Epoch: 11/20... Training Step: 4814... Training loss: 1.1153... 0.1221 sec/batch\n", "Epoch: 11/20... Training Step: 4815... Training loss: 1.2902... 0.1254 sec/batch\n", "Epoch: 11/20... Training Step: 4816... Training loss: 0.9128... 0.1263 sec/batch\n", "Epoch: 11/20... Training Step: 4817... Training loss: 1.0423... 0.1315 sec/batch\n", "Epoch: 11/20... Training Step: 4818... Training loss: 1.2092... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4819... Training loss: 0.8835... 0.1276 sec/batch\n", "Epoch: 11/20... Training Step: 4820... Training loss: 1.1247... 0.1240 sec/batch\n", "Epoch: 11/20... Training Step: 4821... Training loss: 0.9866... 0.1320 sec/batch\n", "Epoch: 11/20... Training Step: 4822... Training loss: 1.2689... 0.1262 sec/batch\n", "Epoch: 11/20... Training Step: 4823... Training loss: 1.1629... 0.1333 sec/batch\n", "Epoch: 11/20... Training Step: 4824... Training loss: 1.1107... 0.1232 sec/batch\n", "Epoch: 11/20... Training Step: 4825... Training loss: 1.1944... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 4826... Training loss: 1.0856... 0.1250 sec/batch\n", "Epoch: 11/20... Training Step: 4827... Training loss: 1.2338... 0.1293 sec/batch\n", "Epoch: 11/20... Training Step: 4828... Training loss: 0.9642... 0.1335 sec/batch\n", "Epoch: 11/20... Training Step: 4829... Training loss: 1.1981... 0.1326 sec/batch\n", "Epoch: 11/20... Training Step: 4830... Training loss: 1.0794... 0.1316 sec/batch\n", "Epoch: 11/20... Training Step: 4831... Training loss: 1.0407... 0.1341 sec/batch\n", "Epoch: 11/20... Training Step: 4832... Training loss: 1.1374... 0.1289 sec/batch\n", "Epoch: 11/20... Training Step: 4833... Training loss: 1.1122... 0.1229 sec/batch\n", "Epoch: 11/20... Training Step: 4834... Training loss: 1.0800... 0.1253 sec/batch\n", "Epoch: 11/20... Training Step: 4835... Training loss: 1.1373... 0.1355 sec/batch\n", "Epoch: 11/20... Training Step: 4836... Training loss: 1.1398... 0.1286 sec/batch\n", "Epoch: 11/20... Training Step: 4837... Training loss: 1.1156... 0.1211 sec/batch\n", "Epoch: 11/20... Training Step: 4838... Training loss: 1.2383... 0.1221 sec/batch\n", "Epoch: 11/20... Training Step: 4839... Training loss: 0.9259... 0.1417 sec/batch\n", "Epoch: 11/20... Training Step: 4840... Training loss: 1.1507... 0.1457 sec/batch\n", "Epoch: 11/20... Training Step: 4841... Training loss: 1.0600... 0.1292 sec/batch\n", "Epoch: 11/20... Training Step: 4842... Training loss: 1.1656... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4843... Training loss: 1.0673... 0.1206 sec/batch\n", "Epoch: 11/20... Training Step: 4844... Training loss: 1.1729... 0.1153 sec/batch\n", "Epoch: 11/20... Training Step: 4845... Training loss: 1.0391... 0.1160 sec/batch\n", "Epoch: 11/20... Training Step: 4846... Training loss: 1.0933... 0.1131 sec/batch\n", "Epoch: 11/20... Training Step: 4847... Training loss: 1.1472... 0.1207 sec/batch\n", "Epoch: 11/20... Training Step: 4848... Training loss: 1.1318... 0.1339 sec/batch\n", "Epoch: 11/20... Training Step: 4849... Training loss: 1.1397... 0.1247 sec/batch\n", "Epoch: 11/20... Training Step: 4850... Training loss: 0.8784... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 4851... Training loss: 1.0675... 0.1284 sec/batch\n", "Epoch: 11/20... Training Step: 4852... Training loss: 1.1329... 0.1405 sec/batch\n", "Epoch: 11/20... Training Step: 4853... Training loss: 1.2140... 0.1228 sec/batch\n", "Epoch: 11/20... Training Step: 4854... Training loss: 1.1356... 0.1260 sec/batch\n", "Epoch: 11/20... Training Step: 4855... Training loss: 1.2288... 0.1262 sec/batch\n", "Epoch: 11/20... Training Step: 4856... Training loss: 1.0714... 0.1262 sec/batch\n", "Epoch: 11/20... Training Step: 4857... Training loss: 1.1303... 0.1291 sec/batch\n", "Epoch: 11/20... Training Step: 4858... Training loss: 1.0694... 0.1220 sec/batch\n", "Epoch: 11/20... Training Step: 4859... Training loss: 1.2721... 0.1183 sec/batch\n", "Epoch: 11/20... Training Step: 4860... Training loss: 1.1571... 0.1213 sec/batch\n", "Epoch: 11/20... Training Step: 4861... Training loss: 1.0495... 0.1200 sec/batch\n", "Epoch: 11/20... Training Step: 4862... Training loss: 1.4137... 0.1238 sec/batch\n", "Epoch: 11/20... Training Step: 4863... Training loss: 1.2239... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4864... Training loss: 1.3537... 0.1234 sec/batch\n", "Epoch: 11/20... Training Step: 4865... Training loss: 1.1408... 0.1143 sec/batch\n", "Epoch: 11/20... Training Step: 4866... Training loss: 1.1576... 0.1237 sec/batch\n", "Epoch: 11/20... Training Step: 4867... Training loss: 1.3145... 0.1311 sec/batch\n", "Epoch: 11/20... Training Step: 4868... Training loss: 1.0490... 0.1179 sec/batch\n", "Epoch: 11/20... Training Step: 4869... Training loss: 1.1972... 0.1247 sec/batch\n", "Epoch: 11/20... Training Step: 4870... Training loss: 1.0759... 0.1237 sec/batch\n", "Epoch: 11/20... Training Step: 4871... Training loss: 1.2380... 0.1209 sec/batch\n", "Epoch: 11/20... Training Step: 4872... Training loss: 1.1119... 0.1148 sec/batch\n", "Epoch: 11/20... Training Step: 4873... Training loss: 1.3718... 0.1215 sec/batch\n", "Epoch: 11/20... Training Step: 4874... Training loss: 1.0930... 0.1157 sec/batch\n", "Epoch: 11/20... Training Step: 4875... Training loss: 1.3889... 0.1182 sec/batch\n", "Epoch: 11/20... Training Step: 4876... Training loss: 1.1328... 0.1272 sec/batch\n", "Epoch: 11/20... Training Step: 4877... Training loss: 1.2214... 0.1303 sec/batch\n", "Epoch: 11/20... Training Step: 4878... Training loss: 1.0544... 0.1287 sec/batch\n", "Epoch: 11/20... Training Step: 4879... Training loss: 1.2240... 0.1281 sec/batch\n", "Epoch: 11/20... Training Step: 4880... Training loss: 1.1975... 0.1341 sec/batch\n", "Epoch: 11/20... Training Step: 4881... Training loss: 1.0868... 0.1267 sec/batch\n", "Epoch: 11/20... Training Step: 4882... Training loss: 0.9920... 0.1178 sec/batch\n", "Epoch: 11/20... Training Step: 4883... Training loss: 1.2159... 0.1250 sec/batch\n", "Epoch: 11/20... Training Step: 4884... Training loss: 1.2499... 0.1354 sec/batch\n", "Epoch: 11/20... Training Step: 4885... Training loss: 1.1721... 0.1196 sec/batch\n", "Epoch: 11/20... Training Step: 4886... Training loss: 1.0664... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4887... Training loss: 1.1064... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 4888... Training loss: 1.3557... 0.1325 sec/batch\n", "Epoch: 11/20... Training Step: 4889... Training loss: 1.0984... 0.1239 sec/batch\n", "Epoch: 11/20... Training Step: 4890... Training loss: 1.1912... 0.1188 sec/batch\n", "Epoch: 11/20... Training Step: 4891... Training loss: 1.2272... 0.1246 sec/batch\n", "Epoch: 11/20... Training Step: 4892... Training loss: 1.1222... 0.1286 sec/batch\n", "Epoch: 11/20... Training Step: 4893... Training loss: 0.9849... 0.1315 sec/batch\n", "Epoch: 11/20... Training Step: 4894... Training loss: 1.1243... 0.1241 sec/batch\n", "Epoch: 11/20... Training Step: 4895... Training loss: 1.0579... 0.1216 sec/batch\n", "Epoch: 11/20... Training Step: 4896... Training loss: 1.1446... 0.1239 sec/batch\n", "Epoch: 11/20... Training Step: 4897... Training loss: 1.2571... 0.1255 sec/batch\n", "Epoch: 11/20... Training Step: 4898... Training loss: 1.0187... 0.1315 sec/batch\n", "Epoch: 11/20... Training Step: 4899... Training loss: 1.0706... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4900... Training loss: 1.0472... 0.1288 sec/batch\n", "Epoch: 11/20... Training Step: 4901... Training loss: 1.2050... 0.1305 sec/batch\n", "Epoch: 11/20... Training Step: 4902... Training loss: 1.1247... 0.1357 sec/batch\n", "Epoch: 11/20... Training Step: 4903... Training loss: 1.0568... 0.1241 sec/batch\n", "Epoch: 11/20... Training Step: 4904... Training loss: 1.1952... 0.1213 sec/batch\n", "Epoch: 11/20... Training Step: 4905... Training loss: 1.1611... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4906... Training loss: 1.1831... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 4907... Training loss: 1.2288... 0.1233 sec/batch\n", "Epoch: 11/20... Training Step: 4908... Training loss: 1.2805... 0.1247 sec/batch\n", "Epoch: 11/20... Training Step: 4909... Training loss: 1.2931... 0.1287 sec/batch\n", "Epoch: 11/20... Training Step: 4910... Training loss: 1.1108... 0.1284 sec/batch\n", "Epoch: 11/20... Training Step: 4911... Training loss: 1.2995... 0.1222 sec/batch\n", "Epoch: 11/20... Training Step: 4912... Training loss: 1.1967... 0.1199 sec/batch\n", "Epoch: 11/20... Training Step: 4913... Training loss: 1.2149... 0.1194 sec/batch\n", "Epoch: 11/20... Training Step: 4914... Training loss: 1.2176... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 4915... Training loss: 1.1471... 0.1116 sec/batch\n", "Epoch: 11/20... Training Step: 4916... Training loss: 1.1075... 0.1133 sec/batch\n", "Epoch: 11/20... Training Step: 4917... Training loss: 1.1894... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 4918... Training loss: 1.5159... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 4919... Training loss: 1.1788... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4920... Training loss: 1.1268... 0.1347 sec/batch\n", "Epoch: 11/20... Training Step: 4921... Training loss: 1.0580... 0.1261 sec/batch\n", "Epoch: 11/20... Training Step: 4922... Training loss: 1.1290... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4923... Training loss: 1.1377... 0.1224 sec/batch\n", "Epoch: 11/20... Training Step: 4924... Training loss: 1.1104... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 4925... Training loss: 1.0183... 0.1223 sec/batch\n", "Epoch: 11/20... Training Step: 4926... Training loss: 1.1025... 0.1458 sec/batch\n", "Epoch: 11/20... Training Step: 4927... Training loss: 1.2140... 0.1243 sec/batch\n", "Epoch: 11/20... Training Step: 4928... Training loss: 1.3478... 0.1227 sec/batch\n", "Epoch: 11/20... Training Step: 4929... Training loss: 1.1146... 0.1332 sec/batch\n", "Epoch: 11/20... Training Step: 4930... Training loss: 1.1414... 0.1236 sec/batch\n", "Epoch: 11/20... Training Step: 4931... Training loss: 1.1390... 0.1322 sec/batch\n", "Epoch: 11/20... Training Step: 4932... Training loss: 1.0771... 0.1259 sec/batch\n", "Epoch: 11/20... Training Step: 4933... Training loss: 1.0427... 0.1221 sec/batch\n", "Epoch: 11/20... Training Step: 4934... Training loss: 1.0906... 0.1114 sec/batch\n", "Epoch: 11/20... Training Step: 4935... Training loss: 1.1365... 0.1123 sec/batch\n", "Epoch: 11/20... Training Step: 4936... Training loss: 1.3540... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 4937... Training loss: 1.0467... 0.1256 sec/batch\n", "Epoch: 11/20... Training Step: 4938... Training loss: 1.0572... 0.1255 sec/batch\n", "Epoch: 11/20... Training Step: 4939... Training loss: 1.1151... 0.1278 sec/batch\n", "Epoch: 11/20... Training Step: 4940... Training loss: 1.2470... 0.1215 sec/batch\n", "Epoch: 11/20... Training Step: 4941... Training loss: 1.1244... 0.1216 sec/batch\n", "Epoch: 11/20... Training Step: 4942... Training loss: 1.1057... 0.1217 sec/batch\n", "Epoch: 11/20... Training Step: 4943... Training loss: 1.0041... 0.1241 sec/batch\n", "Epoch: 11/20... Training Step: 4944... Training loss: 1.4346... 0.1253 sec/batch\n", "Epoch: 11/20... Training Step: 4945... Training loss: 0.9307... 0.1245 sec/batch\n", "Epoch: 11/20... Training Step: 4946... Training loss: 1.1272... 0.1120 sec/batch\n", "Epoch: 11/20... Training Step: 4947... Training loss: 1.1968... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 4948... Training loss: 1.5039... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4949... Training loss: 1.2444... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 4950... Training loss: 1.2528... 0.1169 sec/batch\n", "Epoch: 11/20... Training Step: 4951... Training loss: 1.1908... 0.1244 sec/batch\n", "Epoch: 11/20... Training Step: 4952... Training loss: 0.9701... 0.1214 sec/batch\n", "Epoch: 11/20... Training Step: 4953... Training loss: 1.0428... 0.1176 sec/batch\n", "Epoch: 11/20... Training Step: 4954... Training loss: 0.9969... 0.1236 sec/batch\n", "Epoch: 11/20... Training Step: 4955... Training loss: 0.9004... 0.1194 sec/batch\n", "Epoch: 11/20... Training Step: 4956... Training loss: 0.9995... 0.1226 sec/batch\n", "Epoch: 11/20... Training Step: 4957... Training loss: 0.9840... 0.1205 sec/batch\n", "Epoch: 11/20... Training Step: 4958... Training loss: 0.9400... 0.1232 sec/batch\n", "Epoch: 11/20... Training Step: 4959... Training loss: 1.0455... 0.1254 sec/batch\n", "Epoch: 11/20... Training Step: 4960... Training loss: 1.0490... 0.1195 sec/batch\n", "Epoch: 11/20... Training Step: 4961... Training loss: 1.0894... 0.1136 sec/batch\n", "Epoch: 11/20... Training Step: 4962... Training loss: 1.2477... 0.1141 sec/batch\n", "Epoch: 11/20... Training Step: 4963... Training loss: 1.0198... 0.1121 sec/batch\n", "Epoch: 11/20... Training Step: 4964... Training loss: 1.0049... 0.1231 sec/batch\n", "Epoch: 11/20... Training Step: 4965... Training loss: 0.9652... 0.1273 sec/batch\n", "Epoch: 11/20... Training Step: 4966... Training loss: 1.0048... 0.1239 sec/batch\n", "Epoch: 11/20... Training Step: 4967... Training loss: 1.1590... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4968... Training loss: 0.9918... 0.1236 sec/batch\n", "Epoch: 11/20... Training Step: 4969... Training loss: 1.2386... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 4970... Training loss: 1.0326... 0.1255 sec/batch\n", "Epoch: 11/20... Training Step: 4971... Training loss: 1.1362... 0.1257 sec/batch\n", "Epoch: 11/20... Training Step: 4972... Training loss: 1.1222... 0.1251 sec/batch\n", "Epoch: 11/20... Training Step: 4973... Training loss: 1.0147... 0.1252 sec/batch\n", "Epoch: 11/20... Training Step: 4974... Training loss: 0.9779... 0.1244 sec/batch\n", "Epoch: 11/20... Training Step: 4975... Training loss: 1.0798... 0.1229 sec/batch\n", "Epoch: 11/20... Training Step: 4976... Training loss: 1.0467... 0.1245 sec/batch\n", "Epoch: 11/20... Training Step: 4977... Training loss: 1.0238... 0.1211 sec/batch\n", "Epoch: 11/20... Training Step: 4978... Training loss: 1.0102... 0.1324 sec/batch\n", "Epoch: 11/20... Training Step: 4979... Training loss: 1.1478... 0.1273 sec/batch\n", "Epoch: 11/20... Training Step: 4980... Training loss: 1.0061... 0.1305 sec/batch\n", "Epoch: 11/20... Training Step: 4981... Training loss: 1.0986... 0.1289 sec/batch\n", "Epoch: 11/20... Training Step: 4982... Training loss: 1.2405... 0.1251 sec/batch\n", "Epoch: 11/20... Training Step: 4983... Training loss: 1.0040... 0.1235 sec/batch\n", "Epoch: 11/20... Training Step: 4984... Training loss: 1.2078... 0.1207 sec/batch\n", "Epoch: 11/20... Training Step: 4985... Training loss: 1.0444... 0.1225 sec/batch\n", "Epoch: 11/20... Training Step: 4986... Training loss: 1.0842... 0.1252 sec/batch\n", "Epoch: 11/20... Training Step: 4987... Training loss: 0.9174... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 4988... Training loss: 1.3370... 0.1324 sec/batch\n", "Epoch: 11/20... Training Step: 4989... Training loss: 1.1495... 0.1221 sec/batch\n", "Epoch: 11/20... Training Step: 4990... Training loss: 1.0358... 0.1304 sec/batch\n", "Epoch: 11/20... Training Step: 4991... Training loss: 1.0640... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 4992... Training loss: 1.1091... 0.1233 sec/batch\n", "Epoch: 11/20... Training Step: 4993... Training loss: 0.9694... 0.1298 sec/batch\n", "Epoch: 11/20... Training Step: 4994... Training loss: 0.8153... 0.1288 sec/batch\n", "Epoch: 11/20... Training Step: 4995... Training loss: 1.1708... 0.1253 sec/batch\n", "Epoch: 11/20... Training Step: 4996... Training loss: 1.1145... 0.1256 sec/batch\n", "Epoch: 11/20... Training Step: 4997... Training loss: 1.0493... 0.1348 sec/batch\n", "Epoch: 11/20... Training Step: 4998... Training loss: 1.1900... 0.1279 sec/batch\n", "Epoch: 11/20... Training Step: 4999... Training loss: 1.2238... 0.1262 sec/batch\n", "Epoch: 11/20... Training Step: 5000... Training loss: 0.9184... 0.1237 sec/batch\n", "Epoch: 11/20... Training Step: 5001... Training loss: 1.1696... 0.1173 sec/batch\n", "Epoch: 11/20... Training Step: 5002... Training loss: 1.1307... 0.1148 sec/batch\n", "Epoch: 11/20... Training Step: 5003... Training loss: 1.0353... 0.1166 sec/batch\n", "Epoch: 11/20... Training Step: 5004... Training loss: 1.0680... 0.1172 sec/batch\n", "Epoch: 11/20... Training Step: 5005... Training loss: 1.0882... 0.1158 sec/batch\n", "Epoch: 11/20... Training Step: 5006... Training loss: 1.1675... 0.1219 sec/batch\n", "Epoch: 11/20... Training Step: 5007... Training loss: 0.9894... 0.1195 sec/batch\n", "Epoch: 11/20... Training Step: 5008... Training loss: 1.2022... 0.1173 sec/batch\n", "Epoch: 11/20... Training Step: 5009... Training loss: 1.1112... 0.1218 sec/batch\n", "Epoch: 11/20... Training Step: 5010... Training loss: 1.0014... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 5011... Training loss: 1.0114... 0.1204 sec/batch\n", "Epoch: 11/20... Training Step: 5012... Training loss: 1.1007... 0.1161 sec/batch\n", "Epoch: 11/20... Training Step: 5013... Training loss: 0.9593... 0.1160 sec/batch\n", "Epoch: 11/20... Training Step: 5014... Training loss: 1.3197... 0.1186 sec/batch\n", "Epoch: 11/20... Training Step: 5015... Training loss: 1.2819... 0.1129 sec/batch\n", "Epoch: 11/20... Training Step: 5016... Training loss: 1.1930... 0.1148 sec/batch\n", "Epoch: 11/20... Training Step: 5017... Training loss: 1.1820... 0.1178 sec/batch\n", "Epoch: 11/20... Training Step: 5018... Training loss: 1.2356... 0.1185 sec/batch\n", "Epoch: 11/20... Training Step: 5019... Training loss: 1.1693... 0.1130 sec/batch\n", "Epoch: 11/20... Training Step: 5020... Training loss: 1.1366... 0.1176 sec/batch\n", "Epoch: 11/20... Training Step: 5021... Training loss: 1.0466... 0.1143 sec/batch\n", "Epoch: 11/20... Training Step: 5022... Training loss: 1.0704... 0.1165 sec/batch\n", "Epoch: 11/20... Training Step: 5023... Training loss: 0.9570... 0.1175 sec/batch\n", "Epoch: 11/20... Training Step: 5024... Training loss: 1.2216... 0.1216 sec/batch\n", "Epoch: 11/20... Training Step: 5025... Training loss: 1.0085... 0.1175 sec/batch\n", "Epoch: 11/20... Training Step: 5026... Training loss: 1.2038... 0.1207 sec/batch\n", "Epoch: 11/20... Training Step: 5027... Training loss: 1.1117... 0.1197 sec/batch\n", "Epoch: 11/20... Training Step: 5028... Training loss: 0.9754... 0.1164 sec/batch\n", "Epoch: 11/20... Training Step: 5029... Training loss: 1.0205... 0.1147 sec/batch\n", "Epoch: 11/20... Training Step: 5030... Training loss: 1.1224... 0.1155 sec/batch\n", "Epoch: 11/20... Training Step: 5031... Training loss: 1.0367... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 5032... Training loss: 0.9474... 0.1269 sec/batch\n", "Epoch: 11/20... Training Step: 5033... Training loss: 1.1800... 0.1241 sec/batch\n", "Epoch: 11/20... Training Step: 5034... Training loss: 1.0170... 0.1214 sec/batch\n", "Epoch: 11/20... Training Step: 5035... Training loss: 1.0606... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 5036... Training loss: 1.4241... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 5037... Training loss: 1.0868... 0.1291 sec/batch\n", "Epoch: 11/20... Training Step: 5038... Training loss: 1.0705... 0.1230 sec/batch\n", "Epoch: 11/20... Training Step: 5039... Training loss: 1.1233... 0.1226 sec/batch\n", "Epoch: 11/20... Training Step: 5040... Training loss: 1.0278... 0.1310 sec/batch\n", "Epoch: 11/20... Training Step: 5041... Training loss: 0.9417... 0.1253 sec/batch\n", "Epoch: 11/20... Training Step: 5042... Training loss: 0.9718... 0.1261 sec/batch\n", "Epoch: 11/20... Training Step: 5043... Training loss: 1.0202... 0.1270 sec/batch\n", "Epoch: 11/20... Training Step: 5044... Training loss: 1.2210... 0.1249 sec/batch\n", "Epoch: 11/20... Training Step: 5045... Training loss: 1.1368... 0.1174 sec/batch\n", "Epoch: 11/20... Training Step: 5046... Training loss: 1.1775... 0.1181 sec/batch\n", "Epoch: 11/20... Training Step: 5047... Training loss: 1.0307... 0.1283 sec/batch\n", "Epoch: 11/20... Training Step: 5048... Training loss: 1.3948... 0.1295 sec/batch\n", "Epoch: 11/20... Training Step: 5049... Training loss: 1.0848... 0.1269 sec/batch\n", "Epoch: 11/20... Training Step: 5050... Training loss: 1.0328... 0.1424 sec/batch\n", "Epoch: 11/20... Training Step: 5051... Training loss: 1.0617... 0.1316 sec/batch\n", "Epoch: 11/20... Training Step: 5052... Training loss: 1.1103... 0.1366 sec/batch\n", "Epoch: 11/20... Training Step: 5053... Training loss: 1.2257... 0.1282 sec/batch\n", "Epoch: 11/20... Training Step: 5054... Training loss: 1.1041... 0.1292 sec/batch\n", "Epoch: 11/20... Training Step: 5055... Training loss: 1.1635... 0.1258 sec/batch\n", "Epoch: 11/20... Training Step: 5056... Training loss: 1.4214... 0.1432 sec/batch\n", "Epoch: 11/20... Training Step: 5057... Training loss: 1.2003... 0.1333 sec/batch\n", "Epoch: 11/20... Training Step: 5058... Training loss: 0.9734... 0.1285 sec/batch\n", "Epoch: 11/20... Training Step: 5059... Training loss: 1.0361... 0.1191 sec/batch\n", "Epoch: 11/20... Training Step: 5060... Training loss: 0.9972... 0.1200 sec/batch\n", "Epoch: 11/20... Training Step: 5061... Training loss: 1.0632... 0.1340 sec/batch\n", "Epoch: 11/20... Training Step: 5062... Training loss: 1.1002... 0.1298 sec/batch\n", "Epoch: 11/20... Training Step: 5063... Training loss: 1.2750... 0.1322 sec/batch\n", "Epoch: 11/20... Training Step: 5064... Training loss: 1.2022... 0.1275 sec/batch\n", "Epoch: 11/20... Training Step: 5065... Training loss: 1.0809... 0.1269 sec/batch\n", "Epoch: 11/20... Training Step: 5066... Training loss: 1.0754... 0.1181 sec/batch\n", "Epoch: 11/20... Training Step: 5067... Training loss: 1.2190... 0.1236 sec/batch\n", "Epoch: 11/20... Training Step: 5068... Training loss: 1.0759... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 5069... Training loss: 1.0237... 0.1200 sec/batch\n", "Epoch: 11/20... Training Step: 5070... Training loss: 1.0702... 0.1205 sec/batch\n", "Epoch: 11/20... Training Step: 5071... Training loss: 1.3340... 0.1307 sec/batch\n", "Epoch: 11/20... Training Step: 5072... Training loss: 1.0133... 0.1273 sec/batch\n", "Epoch: 11/20... Training Step: 5073... Training loss: 1.3138... 0.1238 sec/batch\n", "Epoch: 11/20... Training Step: 5074... Training loss: 1.1220... 0.1293 sec/batch\n", "Epoch: 11/20... Training Step: 5075... Training loss: 1.1279... 0.1217 sec/batch\n", "Epoch: 11/20... Training Step: 5076... Training loss: 1.1375... 0.1245 sec/batch\n", "Epoch: 11/20... Training Step: 5077... Training loss: 0.9701... 0.1204 sec/batch\n", "Epoch: 11/20... Training Step: 5078... Training loss: 1.1495... 0.1228 sec/batch\n", "Epoch: 11/20... Training Step: 5079... Training loss: 1.3327... 0.1343 sec/batch\n", "Epoch: 11/20... Training Step: 5080... Training loss: 1.3571... 0.1233 sec/batch\n", "Epoch: 11/20... Training Step: 5081... Training loss: 1.0410... 0.1242 sec/batch\n", "Epoch: 11/20... Training Step: 5082... Training loss: 1.1000... 0.1171 sec/batch\n", "Epoch: 11/20... Training Step: 5083... Training loss: 1.0846... 0.1179 sec/batch\n", "Epoch: 11/20... Training Step: 5084... Training loss: 1.1892... 0.1182 sec/batch\n", "Epoch: 11/20... Training Step: 5085... Training loss: 1.0498... 0.1205 sec/batch\n", "Epoch: 11/20... Training Step: 5086... Training loss: 1.0820... 0.1208 sec/batch\n", "Epoch: 11/20... Training Step: 5087... Training loss: 1.0856... 0.1223 sec/batch\n", "Epoch: 11/20... Training Step: 5088... Training loss: 0.9303... 0.1168 sec/batch\n", "Epoch: 11/20... Training Step: 5089... Training loss: 1.1110... 0.1132 sec/batch\n", "Epoch: 11/20... Training Step: 5090... Training loss: 1.1511... 0.1177 sec/batch\n", "Epoch: 11/20... Training Step: 5091... Training loss: 1.1148... 0.1166 sec/batch\n", "Epoch: 11/20... Training Step: 5092... Training loss: 1.1982... 0.1184 sec/batch\n", "Epoch: 11/20... Training Step: 5093... Training loss: 0.9776... 0.1186 sec/batch\n", "Epoch: 11/20... Training Step: 5094... Training loss: 1.3615... 0.1158 sec/batch\n", "Epoch: 11/20... Training Step: 5095... Training loss: 1.1975... 0.1182 sec/batch\n", "Epoch: 11/20... Training Step: 5096... Training loss: 0.9654... 0.1131 sec/batch\n", "Epoch: 11/20... Training Step: 5097... Training loss: 0.9458... 0.1175 sec/batch\n", "Epoch: 11/20... Training Step: 5098... Training loss: 0.8823... 0.1153 sec/batch\n", "Epoch: 11/20... Training Step: 5099... Training loss: 0.9578... 0.1170 sec/batch\n", "Epoch: 11/20... Training Step: 5100... Training loss: 1.1142... 0.1096 sec/batch\n", "Epoch: 11/20... Training Step: 5101... Training loss: 1.1936... 0.1156 sec/batch\n", "Epoch: 11/20... Training Step: 5102... Training loss: 1.0961... 0.1151 sec/batch\n", "Epoch: 11/20... Training Step: 5103... Training loss: 1.1698... 0.1222 sec/batch\n", "Epoch: 11/20... Training Step: 5104... Training loss: 1.0350... 0.1252 sec/batch\n", "Epoch: 12/20... Training Step: 5105... Training loss: 1.4142... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5106... Training loss: 1.1721... 0.1151 sec/batch\n", "Epoch: 12/20... Training Step: 5107... Training loss: 1.0289... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5108... Training loss: 1.1744... 0.1161 sec/batch\n", "Epoch: 12/20... Training Step: 5109... Training loss: 1.2136... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5110... Training loss: 0.9706... 0.1127 sec/batch\n", "Epoch: 12/20... Training Step: 5111... Training loss: 1.1469... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5112... Training loss: 1.0269... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5113... Training loss: 0.9189... 0.1158 sec/batch\n", "Epoch: 12/20... Training Step: 5114... Training loss: 1.0982... 0.1151 sec/batch\n", "Epoch: 12/20... Training Step: 5115... Training loss: 1.0628... 0.1134 sec/batch\n", "Epoch: 12/20... Training Step: 5116... Training loss: 0.9487... 0.1161 sec/batch\n", "Epoch: 12/20... Training Step: 5117... Training loss: 1.2776... 0.1203 sec/batch\n", "Epoch: 12/20... Training Step: 5118... Training loss: 0.8961... 0.1111 sec/batch\n", "Epoch: 12/20... Training Step: 5119... Training loss: 1.1663... 0.1220 sec/batch\n", "Epoch: 12/20... Training Step: 5120... Training loss: 1.2996... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5121... Training loss: 0.9345... 0.1133 sec/batch\n", "Epoch: 12/20... Training Step: 5122... Training loss: 1.0150... 0.1156 sec/batch\n", "Epoch: 12/20... Training Step: 5123... Training loss: 1.0259... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5124... Training loss: 0.9745... 0.1162 sec/batch\n", "Epoch: 12/20... Training Step: 5125... Training loss: 1.1571... 0.1213 sec/batch\n", "Epoch: 12/20... Training Step: 5126... Training loss: 1.0914... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5127... Training loss: 1.1719... 0.1215 sec/batch\n", "Epoch: 12/20... Training Step: 5128... Training loss: 1.1430... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5129... Training loss: 1.0628... 0.1261 sec/batch\n", "Epoch: 12/20... Training Step: 5130... Training loss: 1.1231... 0.1153 sec/batch\n", "Epoch: 12/20... Training Step: 5131... Training loss: 1.2585... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5132... Training loss: 0.9396... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5133... Training loss: 0.9057... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5134... Training loss: 1.0739... 0.1143 sec/batch\n", "Epoch: 12/20... Training Step: 5135... Training loss: 0.9512... 0.1117 sec/batch\n", "Epoch: 12/20... Training Step: 5136... Training loss: 1.1012... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5137... Training loss: 1.0350... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5138... Training loss: 0.9233... 0.1234 sec/batch\n", "Epoch: 12/20... Training Step: 5139... Training loss: 0.8927... 0.1169 sec/batch\n", "Epoch: 12/20... Training Step: 5140... Training loss: 0.9756... 0.1196 sec/batch\n", "Epoch: 12/20... Training Step: 5141... Training loss: 1.0965... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5142... Training loss: 0.9747... 0.1152 sec/batch\n", "Epoch: 12/20... Training Step: 5143... Training loss: 1.0009... 0.1143 sec/batch\n", "Epoch: 12/20... Training Step: 5144... Training loss: 1.4048... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5145... Training loss: 1.0809... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5146... Training loss: 1.0230... 0.1214 sec/batch\n", "Epoch: 12/20... Training Step: 5147... Training loss: 1.1283... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5148... Training loss: 0.9043... 0.1196 sec/batch\n", "Epoch: 12/20... Training Step: 5149... Training loss: 1.0910... 0.1166 sec/batch\n", "Epoch: 12/20... Training Step: 5150... Training loss: 1.0659... 0.1150 sec/batch\n", "Epoch: 12/20... Training Step: 5151... Training loss: 1.0197... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5152... Training loss: 1.0000... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5153... Training loss: 1.0132... 0.1153 sec/batch\n", "Epoch: 12/20... Training Step: 5154... Training loss: 1.0408... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5155... Training loss: 1.1277... 0.1204 sec/batch\n", "Epoch: 12/20... Training Step: 5156... Training loss: 1.1061... 0.1152 sec/batch\n", "Epoch: 12/20... Training Step: 5157... Training loss: 1.0119... 0.1151 sec/batch\n", "Epoch: 12/20... Training Step: 5158... Training loss: 1.0702... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5159... Training loss: 0.9235... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5160... Training loss: 1.0441... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5161... Training loss: 0.9708... 0.1214 sec/batch\n", "Epoch: 12/20... Training Step: 5162... Training loss: 1.0648... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5163... Training loss: 0.8983... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5164... Training loss: 0.9960... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5165... Training loss: 0.8723... 0.1210 sec/batch\n", "Epoch: 12/20... Training Step: 5166... Training loss: 1.1072... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5167... Training loss: 0.9797... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5168... Training loss: 1.1227... 0.1222 sec/batch\n", "Epoch: 12/20... Training Step: 5169... Training loss: 1.0150... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5170... Training loss: 1.1242... 0.1164 sec/batch\n", "Epoch: 12/20... Training Step: 5171... Training loss: 1.0613... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5172... Training loss: 1.1191... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5173... Training loss: 0.9786... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5174... Training loss: 1.1233... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5175... Training loss: 1.1962... 0.1128 sec/batch\n", "Epoch: 12/20... Training Step: 5176... Training loss: 0.9027... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5177... Training loss: 1.0632... 0.1164 sec/batch\n", "Epoch: 12/20... Training Step: 5178... Training loss: 0.8529... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5179... Training loss: 1.2325... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5180... Training loss: 0.9724... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5181... Training loss: 1.1633... 0.1214 sec/batch\n", "Epoch: 12/20... Training Step: 5182... Training loss: 1.0489... 0.1151 sec/batch\n", "Epoch: 12/20... Training Step: 5183... Training loss: 1.1474... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5184... Training loss: 0.9846... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5185... Training loss: 1.1437... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5186... Training loss: 0.9682... 0.1201 sec/batch\n", "Epoch: 12/20... Training Step: 5187... Training loss: 0.9544... 0.1203 sec/batch\n", "Epoch: 12/20... Training Step: 5188... Training loss: 1.2107... 0.1146 sec/batch\n", "Epoch: 12/20... Training Step: 5189... Training loss: 1.0264... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5190... Training loss: 1.2433... 0.1152 sec/batch\n", "Epoch: 12/20... Training Step: 5191... Training loss: 1.0571... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5192... Training loss: 1.2446... 0.1156 sec/batch\n", "Epoch: 12/20... Training Step: 5193... Training loss: 1.1871... 0.1134 sec/batch\n", "Epoch: 12/20... Training Step: 5194... Training loss: 1.1907... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5195... Training loss: 1.2205... 0.1214 sec/batch\n", "Epoch: 12/20... Training Step: 5196... Training loss: 1.1104... 0.1199 sec/batch\n", "Epoch: 12/20... Training Step: 5197... Training loss: 0.9431... 0.1169 sec/batch\n", "Epoch: 12/20... Training Step: 5198... Training loss: 1.2025... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5199... Training loss: 1.1489... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5200... Training loss: 1.1071... 0.1127 sec/batch\n", "Epoch: 12/20... Training Step: 5201... Training loss: 1.4176... 0.1174 sec/batch\n", "Epoch: 12/20... Training Step: 5202... Training loss: 1.0926... 0.1192 sec/batch\n", "Epoch: 12/20... Training Step: 5203... Training loss: 1.2379... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5204... Training loss: 1.0813... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5205... Training loss: 1.1806... 0.1142 sec/batch\n", "Epoch: 12/20... Training Step: 5206... Training loss: 1.2245... 0.1136 sec/batch\n", "Epoch: 12/20... Training Step: 5207... Training loss: 1.1935... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5208... Training loss: 1.1219... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5209... Training loss: 1.0549... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5210... Training loss: 1.2464... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5211... Training loss: 0.9978... 0.1169 sec/batch\n", "Epoch: 12/20... Training Step: 5212... Training loss: 1.1931... 0.1154 sec/batch\n", "Epoch: 12/20... Training Step: 5213... Training loss: 1.1989... 0.1134 sec/batch\n", "Epoch: 12/20... Training Step: 5214... Training loss: 1.0488... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5215... Training loss: 1.0849... 0.1226 sec/batch\n", "Epoch: 12/20... Training Step: 5216... Training loss: 0.9717... 0.1206 sec/batch\n", "Epoch: 12/20... Training Step: 5217... Training loss: 1.0430... 0.1125 sec/batch\n", "Epoch: 12/20... Training Step: 5218... Training loss: 1.1636... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5219... Training loss: 1.2120... 0.1115 sec/batch\n", "Epoch: 12/20... Training Step: 5220... Training loss: 1.0625... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5221... Training loss: 1.1903... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5222... Training loss: 1.0665... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5223... Training loss: 1.0429... 0.1126 sec/batch\n", "Epoch: 12/20... Training Step: 5224... Training loss: 0.9700... 0.1167 sec/batch\n", "Epoch: 12/20... Training Step: 5225... Training loss: 1.1898... 0.1177 sec/batch\n", "Epoch: 12/20... Training Step: 5226... Training loss: 1.1240... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5227... Training loss: 1.1615... 0.1179 sec/batch\n", "Epoch: 12/20... Training Step: 5228... Training loss: 1.1693... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5229... Training loss: 1.1489... 0.1164 sec/batch\n", "Epoch: 12/20... Training Step: 5230... Training loss: 0.9618... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5231... Training loss: 1.0733... 0.1236 sec/batch\n", "Epoch: 12/20... Training Step: 5232... Training loss: 1.1840... 0.1203 sec/batch\n", "Epoch: 12/20... Training Step: 5233... Training loss: 1.0688... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5234... Training loss: 1.0007... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5235... Training loss: 1.3750... 0.1159 sec/batch\n", "Epoch: 12/20... Training Step: 5236... Training loss: 1.0796... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5237... Training loss: 1.0814... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5238... Training loss: 1.2044... 0.1197 sec/batch\n", "Epoch: 12/20... Training Step: 5239... Training loss: 1.0451... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5240... Training loss: 0.9268... 0.1218 sec/batch\n", "Epoch: 12/20... Training Step: 5241... Training loss: 0.9755... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5242... Training loss: 1.1974... 0.1199 sec/batch\n", "Epoch: 12/20... Training Step: 5243... Training loss: 1.0494... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5244... Training loss: 1.3648... 0.1192 sec/batch\n", "Epoch: 12/20... Training Step: 5245... Training loss: 0.9793... 0.1220 sec/batch\n", "Epoch: 12/20... Training Step: 5246... Training loss: 1.0033... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5247... Training loss: 0.9796... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5248... Training loss: 1.0842... 0.1201 sec/batch\n", "Epoch: 12/20... Training Step: 5249... Training loss: 1.0875... 0.1113 sec/batch\n", "Epoch: 12/20... Training Step: 5250... Training loss: 0.9786... 0.1207 sec/batch\n", "Epoch: 12/20... Training Step: 5251... Training loss: 1.0157... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5252... Training loss: 1.0133... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5253... Training loss: 1.0435... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5254... Training loss: 1.1882... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5255... Training loss: 1.0183... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5256... Training loss: 1.0762... 0.1217 sec/batch\n", "Epoch: 12/20... Training Step: 5257... Training loss: 1.3494... 0.1219 sec/batch\n", "Epoch: 12/20... Training Step: 5258... Training loss: 1.0157... 0.1142 sec/batch\n", "Epoch: 12/20... Training Step: 5259... Training loss: 1.0496... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5260... Training loss: 1.1749... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5261... Training loss: 1.0177... 0.1153 sec/batch\n", "Epoch: 12/20... Training Step: 5262... Training loss: 1.0962... 0.1129 sec/batch\n", "Epoch: 12/20... Training Step: 5263... Training loss: 0.9087... 0.1199 sec/batch\n", "Epoch: 12/20... Training Step: 5264... Training loss: 1.0270... 0.1216 sec/batch\n", "Epoch: 12/20... Training Step: 5265... Training loss: 1.1752... 0.1228 sec/batch\n", "Epoch: 12/20... Training Step: 5266... Training loss: 0.9844... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5267... Training loss: 1.3654... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5268... Training loss: 0.9639... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5269... Training loss: 1.0557... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5270... Training loss: 1.0558... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5271... Training loss: 0.9249... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5272... Training loss: 1.1500... 0.1174 sec/batch\n", "Epoch: 12/20... Training Step: 5273... Training loss: 1.0205... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5274... Training loss: 1.1874... 0.1159 sec/batch\n", "Epoch: 12/20... Training Step: 5275... Training loss: 1.1283... 0.1152 sec/batch\n", "Epoch: 12/20... Training Step: 5276... Training loss: 1.1485... 0.1124 sec/batch\n", "Epoch: 12/20... Training Step: 5277... Training loss: 1.0996... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5278... Training loss: 1.0358... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5279... Training loss: 1.2695... 0.1127 sec/batch\n", "Epoch: 12/20... Training Step: 5280... Training loss: 0.8548... 0.1154 sec/batch\n", "Epoch: 12/20... Training Step: 5281... Training loss: 0.9665... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5282... Training loss: 1.2284... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5283... Training loss: 0.9621... 0.1160 sec/batch\n", "Epoch: 12/20... Training Step: 5284... Training loss: 1.0711... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5285... Training loss: 0.8494... 0.1229 sec/batch\n", "Epoch: 12/20... Training Step: 5286... Training loss: 1.1662... 0.1148 sec/batch\n", "Epoch: 12/20... Training Step: 5287... Training loss: 1.1658... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5288... Training loss: 1.0719... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5289... Training loss: 1.1881... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5290... Training loss: 1.1373... 0.1230 sec/batch\n", "Epoch: 12/20... Training Step: 5291... Training loss: 1.2057... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5292... Training loss: 0.9799... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5293... Training loss: 1.1890... 0.1127 sec/batch\n", "Epoch: 12/20... Training Step: 5294... Training loss: 0.9925... 0.1158 sec/batch\n", "Epoch: 12/20... Training Step: 5295... Training loss: 1.0576... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5296... Training loss: 1.1865... 0.1133 sec/batch\n", "Epoch: 12/20... Training Step: 5297... Training loss: 1.0959... 0.1162 sec/batch\n", "Epoch: 12/20... Training Step: 5298... Training loss: 1.0166... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5299... Training loss: 1.1671... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5300... Training loss: 1.1471... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5301... Training loss: 1.0791... 0.1192 sec/batch\n", "Epoch: 12/20... Training Step: 5302... Training loss: 1.0793... 0.1174 sec/batch\n", "Epoch: 12/20... Training Step: 5303... Training loss: 0.8736... 0.1243 sec/batch\n", "Epoch: 12/20... Training Step: 5304... Training loss: 1.0324... 0.1248 sec/batch\n", "Epoch: 12/20... Training Step: 5305... Training loss: 1.0305... 0.1223 sec/batch\n", "Epoch: 12/20... Training Step: 5306... Training loss: 1.1022... 0.1118 sec/batch\n", "Epoch: 12/20... Training Step: 5307... Training loss: 0.9932... 0.1239 sec/batch\n", "Epoch: 12/20... Training Step: 5308... Training loss: 1.2179... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5309... Training loss: 0.9815... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5310... Training loss: 0.9831... 0.1215 sec/batch\n", "Epoch: 12/20... Training Step: 5311... Training loss: 1.0817... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5312... Training loss: 1.2052... 0.1115 sec/batch\n", "Epoch: 12/20... Training Step: 5313... Training loss: 1.1035... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5314... Training loss: 1.0144... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5315... Training loss: 0.8852... 0.1217 sec/batch\n", "Epoch: 12/20... Training Step: 5316... Training loss: 1.1755... 0.1203 sec/batch\n", "Epoch: 12/20... Training Step: 5317... Training loss: 1.2227... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5318... Training loss: 1.0826... 0.1397 sec/batch\n", "Epoch: 12/20... Training Step: 5319... Training loss: 1.1640... 0.1708 sec/batch\n", "Epoch: 12/20... Training Step: 5320... Training loss: 1.1022... 0.1607 sec/batch\n", "Epoch: 12/20... Training Step: 5321... Training loss: 1.1221... 0.1442 sec/batch\n", "Epoch: 12/20... Training Step: 5322... Training loss: 1.1403... 0.1339 sec/batch\n", "Epoch: 12/20... Training Step: 5323... Training loss: 1.2717... 0.1267 sec/batch\n", "Epoch: 12/20... Training Step: 5324... Training loss: 1.2186... 0.1289 sec/batch\n", "Epoch: 12/20... Training Step: 5325... Training loss: 1.1091... 0.1265 sec/batch\n", "Epoch: 12/20... Training Step: 5326... Training loss: 1.4322... 0.1233 sec/batch\n", "Epoch: 12/20... Training Step: 5327... Training loss: 1.1888... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5328... Training loss: 1.2495... 0.1288 sec/batch\n", "Epoch: 12/20... Training Step: 5329... Training loss: 1.1285... 0.1269 sec/batch\n", "Epoch: 12/20... Training Step: 5330... Training loss: 1.2364... 0.1254 sec/batch\n", "Epoch: 12/20... Training Step: 5331... Training loss: 1.1506... 0.1232 sec/batch\n", "Epoch: 12/20... Training Step: 5332... Training loss: 1.0002... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5333... Training loss: 1.1027... 0.1169 sec/batch\n", "Epoch: 12/20... Training Step: 5334... Training loss: 1.0917... 0.1146 sec/batch\n", "Epoch: 12/20... Training Step: 5335... Training loss: 1.1348... 0.1140 sec/batch\n", "Epoch: 12/20... Training Step: 5336... Training loss: 1.0260... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5337... Training loss: 1.3320... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5338... Training loss: 1.0997... 0.1205 sec/batch\n", "Epoch: 12/20... Training Step: 5339... Training loss: 1.3327... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5340... Training loss: 1.0975... 0.1226 sec/batch\n", "Epoch: 12/20... Training Step: 5341... Training loss: 1.1783... 0.1210 sec/batch\n", "Epoch: 12/20... Training Step: 5342... Training loss: 1.0085... 0.1161 sec/batch\n", "Epoch: 12/20... Training Step: 5343... Training loss: 1.2126... 0.1148 sec/batch\n", "Epoch: 12/20... Training Step: 5344... Training loss: 1.2089... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5345... Training loss: 1.1329... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5346... Training loss: 1.0562... 0.1147 sec/batch\n", "Epoch: 12/20... Training Step: 5347... Training loss: 1.2914... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5348... Training loss: 1.1848... 0.1235 sec/batch\n", "Epoch: 12/20... Training Step: 5349... Training loss: 1.1234... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5350... Training loss: 1.0085... 0.1300 sec/batch\n", "Epoch: 12/20... Training Step: 5351... Training loss: 1.0727... 0.1244 sec/batch\n", "Epoch: 12/20... Training Step: 5352... Training loss: 1.2099... 0.1318 sec/batch\n", "Epoch: 12/20... Training Step: 5353... Training loss: 1.2048... 0.1301 sec/batch\n", "Epoch: 12/20... Training Step: 5354... Training loss: 1.2328... 0.1207 sec/batch\n", "Epoch: 12/20... Training Step: 5355... Training loss: 1.0792... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5356... Training loss: 1.1655... 0.1199 sec/batch\n", "Epoch: 12/20... Training Step: 5357... Training loss: 1.0726... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5358... Training loss: 1.0632... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5359... Training loss: 1.0885... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5360... Training loss: 0.9936... 0.1213 sec/batch\n", "Epoch: 12/20... Training Step: 5361... Training loss: 1.2502... 0.1134 sec/batch\n", "Epoch: 12/20... Training Step: 5362... Training loss: 0.9723... 0.1120 sec/batch\n", "Epoch: 12/20... Training Step: 5363... Training loss: 1.0517... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5364... Training loss: 0.9542... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5365... Training loss: 1.1759... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5366... Training loss: 1.0715... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5367... Training loss: 1.0393... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5368... Training loss: 1.1450... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5369... Training loss: 1.1453... 0.1195 sec/batch\n", "Epoch: 12/20... Training Step: 5370... Training loss: 1.0778... 0.1169 sec/batch\n", "Epoch: 12/20... Training Step: 5371... Training loss: 1.1707... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5372... Training loss: 1.1699... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5373... Training loss: 1.2210... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5374... Training loss: 1.1800... 0.1149 sec/batch\n", "Epoch: 12/20... Training Step: 5375... Training loss: 1.2940... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5376... Training loss: 1.2030... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5377... Training loss: 1.2182... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5378... Training loss: 1.2161... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5379... Training loss: 1.1148... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5380... Training loss: 1.1100... 0.1149 sec/batch\n", "Epoch: 12/20... Training Step: 5381... Training loss: 1.1688... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5382... Training loss: 1.4487... 0.1203 sec/batch\n", "Epoch: 12/20... Training Step: 5383... Training loss: 1.1221... 0.1267 sec/batch\n", "Epoch: 12/20... Training Step: 5384... Training loss: 1.0969... 0.1270 sec/batch\n", "Epoch: 12/20... Training Step: 5385... Training loss: 1.1091... 0.1278 sec/batch\n", "Epoch: 12/20... Training Step: 5386... Training loss: 1.1765... 0.1227 sec/batch\n", "Epoch: 12/20... Training Step: 5387... Training loss: 1.2145... 0.1260 sec/batch\n", "Epoch: 12/20... Training Step: 5388... Training loss: 1.0692... 0.1333 sec/batch\n", "Epoch: 12/20... Training Step: 5389... Training loss: 1.0011... 0.1339 sec/batch\n", "Epoch: 12/20... Training Step: 5390... Training loss: 1.0422... 0.1250 sec/batch\n", "Epoch: 12/20... Training Step: 5391... Training loss: 1.0962... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5392... Training loss: 1.1401... 0.1159 sec/batch\n", "Epoch: 12/20... Training Step: 5393... Training loss: 1.1092... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5394... Training loss: 1.2086... 0.1156 sec/batch\n", "Epoch: 12/20... Training Step: 5395... Training loss: 1.0422... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5396... Training loss: 1.0748... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5397... Training loss: 1.0397... 0.1167 sec/batch\n", "Epoch: 12/20... Training Step: 5398... Training loss: 1.2013... 0.1192 sec/batch\n", "Epoch: 12/20... Training Step: 5399... Training loss: 1.1654... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5400... Training loss: 1.3651... 0.1143 sec/batch\n", "Epoch: 12/20... Training Step: 5401... Training loss: 1.0557... 0.1216 sec/batch\n", "Epoch: 12/20... Training Step: 5402... Training loss: 1.1651... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5403... Training loss: 1.0372... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5404... Training loss: 1.0920... 0.1193 sec/batch\n", "Epoch: 12/20... Training Step: 5405... Training loss: 1.1163... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5406... Training loss: 1.0369... 0.1145 sec/batch\n", "Epoch: 12/20... Training Step: 5407... Training loss: 0.9360... 0.1166 sec/batch\n", "Epoch: 12/20... Training Step: 5408... Training loss: 1.2003... 0.1177 sec/batch\n", "Epoch: 12/20... Training Step: 5409... Training loss: 1.0891... 0.1165 sec/batch\n", "Epoch: 12/20... Training Step: 5410... Training loss: 1.1936... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5411... Training loss: 1.1581... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5412... Training loss: 1.3471... 0.1139 sec/batch\n", "Epoch: 12/20... Training Step: 5413... Training loss: 1.2748... 0.1130 sec/batch\n", "Epoch: 12/20... Training Step: 5414... Training loss: 1.1956... 0.1165 sec/batch\n", "Epoch: 12/20... Training Step: 5415... Training loss: 1.0855... 0.1184 sec/batch\n", "Epoch: 12/20... Training Step: 5416... Training loss: 0.9911... 0.1170 sec/batch\n", "Epoch: 12/20... Training Step: 5417... Training loss: 1.0482... 0.1201 sec/batch\n", "Epoch: 12/20... Training Step: 5418... Training loss: 1.0498... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5419... Training loss: 0.8816... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5420... Training loss: 0.9670... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5421... Training loss: 0.9866... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5422... Training loss: 1.0369... 0.1207 sec/batch\n", "Epoch: 12/20... Training Step: 5423... Training loss: 0.9199... 0.1179 sec/batch\n", "Epoch: 12/20... Training Step: 5424... Training loss: 1.0007... 0.1184 sec/batch\n", "Epoch: 12/20... Training Step: 5425... Training loss: 1.0885... 0.1226 sec/batch\n", "Epoch: 12/20... Training Step: 5426... Training loss: 1.2334... 0.1237 sec/batch\n", "Epoch: 12/20... Training Step: 5427... Training loss: 1.0422... 0.1159 sec/batch\n", "Epoch: 12/20... Training Step: 5428... Training loss: 1.0035... 0.1184 sec/batch\n", "Epoch: 12/20... Training Step: 5429... Training loss: 0.9049... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5430... Training loss: 0.9246... 0.1154 sec/batch\n", "Epoch: 12/20... Training Step: 5431... Training loss: 1.0600... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5432... Training loss: 1.0225... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5433... Training loss: 1.2918... 0.1299 sec/batch\n", "Epoch: 12/20... Training Step: 5434... Training loss: 1.1308... 0.1208 sec/batch\n", "Epoch: 12/20... Training Step: 5435... Training loss: 1.2680... 0.1177 sec/batch\n", "Epoch: 12/20... Training Step: 5436... Training loss: 1.1368... 0.1207 sec/batch\n", "Epoch: 12/20... Training Step: 5437... Training loss: 1.0148... 0.1134 sec/batch\n", "Epoch: 12/20... Training Step: 5438... Training loss: 1.0274... 0.1127 sec/batch\n", "Epoch: 12/20... Training Step: 5439... Training loss: 1.1550... 0.1174 sec/batch\n", "Epoch: 12/20... Training Step: 5440... Training loss: 1.1138... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5441... Training loss: 1.0068... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5442... Training loss: 0.9072... 0.1137 sec/batch\n", "Epoch: 12/20... Training Step: 5443... Training loss: 1.1344... 0.1262 sec/batch\n", "Epoch: 12/20... Training Step: 5444... Training loss: 1.1066... 0.1259 sec/batch\n", "Epoch: 12/20... Training Step: 5445... Training loss: 0.9642... 0.1186 sec/batch\n", "Epoch: 12/20... Training Step: 5446... Training loss: 1.1181... 0.1170 sec/batch\n", "Epoch: 12/20... Training Step: 5447... Training loss: 0.9039... 0.1162 sec/batch\n", "Epoch: 12/20... Training Step: 5448... Training loss: 1.1667... 0.1219 sec/batch\n", "Epoch: 12/20... Training Step: 5449... Training loss: 1.0114... 0.1160 sec/batch\n", "Epoch: 12/20... Training Step: 5450... Training loss: 0.9648... 0.1314 sec/batch\n", "Epoch: 12/20... Training Step: 5451... Training loss: 0.8987... 0.1295 sec/batch\n", "Epoch: 12/20... Training Step: 5452... Training loss: 1.2681... 0.1242 sec/batch\n", "Epoch: 12/20... Training Step: 5453... Training loss: 1.0488... 0.1213 sec/batch\n", "Epoch: 12/20... Training Step: 5454... Training loss: 1.0638... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5455... Training loss: 1.0791... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5456... Training loss: 1.0221... 0.1201 sec/batch\n", "Epoch: 12/20... Training Step: 5457... Training loss: 0.9847... 0.1192 sec/batch\n", "Epoch: 12/20... Training Step: 5458... Training loss: 0.8453... 0.1258 sec/batch\n", "Epoch: 12/20... Training Step: 5459... Training loss: 1.1506... 0.1172 sec/batch\n", "Epoch: 12/20... Training Step: 5460... Training loss: 0.9920... 0.1149 sec/batch\n", "Epoch: 12/20... Training Step: 5461... Training loss: 0.9764... 0.1165 sec/batch\n", "Epoch: 12/20... Training Step: 5462... Training loss: 1.1520... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5463... Training loss: 1.1799... 0.1160 sec/batch\n", "Epoch: 12/20... Training Step: 5464... Training loss: 0.8891... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5465... Training loss: 1.1656... 0.1180 sec/batch\n", "Epoch: 12/20... Training Step: 5466... Training loss: 1.1706... 0.1170 sec/batch\n", "Epoch: 12/20... Training Step: 5467... Training loss: 0.9097... 0.1201 sec/batch\n", "Epoch: 12/20... Training Step: 5468... Training loss: 0.9815... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5469... Training loss: 1.0729... 0.1207 sec/batch\n", "Epoch: 12/20... Training Step: 5470... Training loss: 1.1732... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5471... Training loss: 1.0378... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5472... Training loss: 1.2122... 0.1182 sec/batch\n", "Epoch: 12/20... Training Step: 5473... Training loss: 1.1046... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5474... Training loss: 1.0272... 0.1142 sec/batch\n", "Epoch: 12/20... Training Step: 5475... Training loss: 0.9494... 0.1152 sec/batch\n", "Epoch: 12/20... Training Step: 5476... Training loss: 1.0452... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5477... Training loss: 1.0181... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5478... Training loss: 1.1632... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5479... Training loss: 1.0928... 0.1216 sec/batch\n", "Epoch: 12/20... Training Step: 5480... Training loss: 1.1298... 0.1174 sec/batch\n", "Epoch: 12/20... Training Step: 5481... Training loss: 1.1823... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5482... Training loss: 1.1460... 0.1226 sec/batch\n", "Epoch: 12/20... Training Step: 5483... Training loss: 1.1489... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5484... Training loss: 1.0619... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5485... Training loss: 1.0710... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5486... Training loss: 1.0093... 0.1197 sec/batch\n", "Epoch: 12/20... Training Step: 5487... Training loss: 1.0150... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5488... Training loss: 1.1927... 0.1162 sec/batch\n", "Epoch: 12/20... Training Step: 5489... Training loss: 0.9960... 0.1151 sec/batch\n", "Epoch: 12/20... Training Step: 5490... Training loss: 1.0273... 0.1155 sec/batch\n", "Epoch: 12/20... Training Step: 5491... Training loss: 1.1085... 0.1162 sec/batch\n", "Epoch: 12/20... Training Step: 5492... Training loss: 0.9275... 0.1149 sec/batch\n", "Epoch: 12/20... Training Step: 5493... Training loss: 0.9343... 0.1198 sec/batch\n", "Epoch: 12/20... Training Step: 5494... Training loss: 1.0043... 0.1176 sec/batch\n", "Epoch: 12/20... Training Step: 5495... Training loss: 0.9880... 0.1160 sec/batch\n", "Epoch: 12/20... Training Step: 5496... Training loss: 1.0750... 0.1166 sec/batch\n", "Epoch: 12/20... Training Step: 5497... Training loss: 1.0498... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5498... Training loss: 1.0232... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5499... Training loss: 1.0628... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5500... Training loss: 1.2021... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5501... Training loss: 0.9822... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5502... Training loss: 1.0198... 0.1210 sec/batch\n", "Epoch: 12/20... Training Step: 5503... Training loss: 1.1256... 0.1141 sec/batch\n", "Epoch: 12/20... Training Step: 5504... Training loss: 1.1327... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5505... Training loss: 0.8461... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5506... Training loss: 0.9670... 0.1149 sec/batch\n", "Epoch: 12/20... Training Step: 5507... Training loss: 1.0041... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5508... Training loss: 1.2398... 0.1209 sec/batch\n", "Epoch: 12/20... Training Step: 5509... Training loss: 1.2107... 0.1271 sec/batch\n", "Epoch: 12/20... Training Step: 5510... Training loss: 1.1720... 0.1331 sec/batch\n", "Epoch: 12/20... Training Step: 5511... Training loss: 1.1550... 0.1289 sec/batch\n", "Epoch: 12/20... Training Step: 5512... Training loss: 1.3129... 0.1255 sec/batch\n", "Epoch: 12/20... Training Step: 5513... Training loss: 1.0926... 0.1231 sec/batch\n", "Epoch: 12/20... Training Step: 5514... Training loss: 0.9667... 0.1119 sec/batch\n", "Epoch: 12/20... Training Step: 5515... Training loss: 1.0049... 0.1130 sec/batch\n", "Epoch: 12/20... Training Step: 5516... Training loss: 1.1309... 0.1175 sec/batch\n", "Epoch: 12/20... Training Step: 5517... Training loss: 1.2509... 0.1183 sec/batch\n", "Epoch: 12/20... Training Step: 5518... Training loss: 1.0779... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5519... Training loss: 1.1168... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5520... Training loss: 1.3585... 0.1222 sec/batch\n", "Epoch: 12/20... Training Step: 5521... Training loss: 1.0795... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5522... Training loss: 0.8915... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5523... Training loss: 1.0037... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5524... Training loss: 0.9645... 0.1189 sec/batch\n", "Epoch: 12/20... Training Step: 5525... Training loss: 1.1401... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5526... Training loss: 1.1361... 0.1145 sec/batch\n", "Epoch: 12/20... Training Step: 5527... Training loss: 1.1051... 0.1235 sec/batch\n", "Epoch: 12/20... Training Step: 5528... Training loss: 1.2129... 0.1188 sec/batch\n", "Epoch: 12/20... Training Step: 5529... Training loss: 1.0280... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5530... Training loss: 1.0719... 0.1200 sec/batch\n", "Epoch: 12/20... Training Step: 5531... Training loss: 1.1050... 0.1163 sec/batch\n", "Epoch: 12/20... Training Step: 5532... Training loss: 1.0545... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5533... Training loss: 1.0259... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5534... Training loss: 1.0757... 0.1168 sec/batch\n", "Epoch: 12/20... Training Step: 5535... Training loss: 1.1904... 0.1178 sec/batch\n", "Epoch: 12/20... Training Step: 5536... Training loss: 1.0580... 0.1241 sec/batch\n", "Epoch: 12/20... Training Step: 5537... Training loss: 1.2774... 0.1145 sec/batch\n", "Epoch: 12/20... Training Step: 5538... Training loss: 1.1851... 0.1144 sec/batch\n", "Epoch: 12/20... Training Step: 5539... Training loss: 1.0188... 0.1173 sec/batch\n", "Epoch: 12/20... Training Step: 5540... Training loss: 1.0965... 0.1208 sec/batch\n", "Epoch: 12/20... Training Step: 5541... Training loss: 0.9629... 0.1191 sec/batch\n", "Epoch: 12/20... Training Step: 5542... Training loss: 1.1808... 0.1142 sec/batch\n", "Epoch: 12/20... Training Step: 5543... Training loss: 1.2846... 0.1243 sec/batch\n", "Epoch: 12/20... Training Step: 5544... Training loss: 1.3076... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5545... Training loss: 0.9763... 0.1231 sec/batch\n", "Epoch: 12/20... Training Step: 5546... Training loss: 1.0871... 0.1154 sec/batch\n", "Epoch: 12/20... Training Step: 5547... Training loss: 1.1039... 0.1202 sec/batch\n", "Epoch: 12/20... Training Step: 5548... Training loss: 1.0214... 0.1185 sec/batch\n", "Epoch: 12/20... Training Step: 5549... Training loss: 1.0235... 0.1157 sec/batch\n", "Epoch: 12/20... Training Step: 5550... Training loss: 1.0577... 0.1181 sec/batch\n", "Epoch: 12/20... Training Step: 5551... Training loss: 1.1562... 0.1196 sec/batch\n", "Epoch: 12/20... Training Step: 5552... Training loss: 0.9709... 0.1196 sec/batch\n", "Epoch: 12/20... Training Step: 5553... Training loss: 1.0205... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5554... Training loss: 1.0785... 0.1208 sec/batch\n", "Epoch: 12/20... Training Step: 5555... Training loss: 0.9845... 0.1221 sec/batch\n", "Epoch: 12/20... Training Step: 5556... Training loss: 1.1813... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5557... Training loss: 1.0523... 0.1171 sec/batch\n", "Epoch: 12/20... Training Step: 5558... Training loss: 1.1619... 0.1211 sec/batch\n", "Epoch: 12/20... Training Step: 5559... Training loss: 1.1088... 0.1165 sec/batch\n", "Epoch: 12/20... Training Step: 5560... Training loss: 0.9128... 0.1230 sec/batch\n", "Epoch: 12/20... Training Step: 5561... Training loss: 1.0081... 0.1217 sec/batch\n", "Epoch: 12/20... Training Step: 5562... Training loss: 0.9231... 0.1285 sec/batch\n", "Epoch: 12/20... Training Step: 5563... Training loss: 0.9855... 0.1194 sec/batch\n", "Epoch: 12/20... Training Step: 5564... Training loss: 1.0339... 0.1187 sec/batch\n", "Epoch: 12/20... Training Step: 5565... Training loss: 1.0894... 0.1292 sec/batch\n", "Epoch: 12/20... Training Step: 5566... Training loss: 0.9788... 0.1190 sec/batch\n", "Epoch: 12/20... Training Step: 5567... Training loss: 1.1114... 0.1258 sec/batch\n", "Epoch: 12/20... Training Step: 5568... Training loss: 1.0465... 0.1245 sec/batch\n", "Epoch: 13/20... Training Step: 5569... Training loss: 1.3773... 0.1265 sec/batch\n", "Epoch: 13/20... Training Step: 5570... Training loss: 1.1320... 0.1238 sec/batch\n", "Epoch: 13/20... Training Step: 5571... Training loss: 1.0771... 0.1168 sec/batch\n", "Epoch: 13/20... Training Step: 5572... Training loss: 1.1517... 0.1285 sec/batch\n", "Epoch: 13/20... Training Step: 5573... Training loss: 1.1432... 0.1247 sec/batch\n", "Epoch: 13/20... Training Step: 5574... Training loss: 0.8844... 0.1227 sec/batch\n", "Epoch: 13/20... Training Step: 5575... Training loss: 1.1936... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5576... Training loss: 0.9261... 0.1207 sec/batch\n", "Epoch: 13/20... Training Step: 5577... Training loss: 0.9836... 0.1277 sec/batch\n", "Epoch: 13/20... Training Step: 5578... Training loss: 1.2503... 0.1231 sec/batch\n", "Epoch: 13/20... Training Step: 5579... Training loss: 1.0426... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5580... Training loss: 0.9293... 0.1219 sec/batch\n", "Epoch: 13/20... Training Step: 5581... Training loss: 1.2648... 0.1271 sec/batch\n", "Epoch: 13/20... Training Step: 5582... Training loss: 0.8409... 0.1239 sec/batch\n", "Epoch: 13/20... Training Step: 5583... Training loss: 1.1342... 0.1228 sec/batch\n", "Epoch: 13/20... Training Step: 5584... Training loss: 1.1728... 0.1218 sec/batch\n", "Epoch: 13/20... Training Step: 5585... Training loss: 1.0151... 0.1204 sec/batch\n", "Epoch: 13/20... Training Step: 5586... Training loss: 0.9533... 0.1304 sec/batch\n", "Epoch: 13/20... Training Step: 5587... Training loss: 0.9729... 0.1254 sec/batch\n", "Epoch: 13/20... Training Step: 5588... Training loss: 1.0116... 0.1232 sec/batch\n", "Epoch: 13/20... Training Step: 5589... Training loss: 1.2089... 0.1370 sec/batch\n", "Epoch: 13/20... Training Step: 5590... Training loss: 1.0977... 0.1292 sec/batch\n", "Epoch: 13/20... Training Step: 5591... Training loss: 1.0918... 0.1295 sec/batch\n", "Epoch: 13/20... Training Step: 5592... Training loss: 1.1186... 0.1288 sec/batch\n", "Epoch: 13/20... Training Step: 5593... Training loss: 1.0331... 0.1244 sec/batch\n", "Epoch: 13/20... Training Step: 5594... Training loss: 1.0919... 0.1181 sec/batch\n", "Epoch: 13/20... Training Step: 5595... Training loss: 1.1071... 0.1267 sec/batch\n", "Epoch: 13/20... Training Step: 5596... Training loss: 1.0819... 0.1231 sec/batch\n", "Epoch: 13/20... Training Step: 5597... Training loss: 1.1049... 0.1213 sec/batch\n", "Epoch: 13/20... Training Step: 5598... Training loss: 0.9935... 0.1226 sec/batch\n", "Epoch: 13/20... Training Step: 5599... Training loss: 0.9303... 0.1214 sec/batch\n", "Epoch: 13/20... Training Step: 5600... Training loss: 1.0773... 0.1182 sec/batch\n", "Epoch: 13/20... Training Step: 5601... Training loss: 0.9290... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5602... Training loss: 1.0208... 0.1144 sec/batch\n", "Epoch: 13/20... Training Step: 5603... Training loss: 0.9689... 0.1200 sec/batch\n", "Epoch: 13/20... Training Step: 5604... Training loss: 0.9886... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5605... Training loss: 1.1212... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5606... Training loss: 1.0071... 0.1249 sec/batch\n", "Epoch: 13/20... Training Step: 5607... Training loss: 0.9879... 0.1212 sec/batch\n", "Epoch: 13/20... Training Step: 5608... Training loss: 1.3244... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5609... Training loss: 1.0181... 0.1132 sec/batch\n", "Epoch: 13/20... Training Step: 5610... Training loss: 1.0072... 0.1194 sec/batch\n", "Epoch: 13/20... Training Step: 5611... Training loss: 1.2114... 0.1191 sec/batch\n", "Epoch: 13/20... Training Step: 5612... Training loss: 0.8667... 0.1205 sec/batch\n", "Epoch: 13/20... Training Step: 5613... Training loss: 1.0466... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5614... Training loss: 0.9367... 0.1102 sec/batch\n", "Epoch: 13/20... Training Step: 5615... Training loss: 1.1105... 0.1191 sec/batch\n", "Epoch: 13/20... Training Step: 5616... Training loss: 1.0418... 0.1160 sec/batch\n", "Epoch: 13/20... Training Step: 5617... Training loss: 1.0921... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5618... Training loss: 1.1215... 0.1181 sec/batch\n", "Epoch: 13/20... Training Step: 5619... Training loss: 1.0523... 0.1162 sec/batch\n", "Epoch: 13/20... Training Step: 5620... Training loss: 0.9645... 0.1130 sec/batch\n", "Epoch: 13/20... Training Step: 5621... Training loss: 1.0764... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5622... Training loss: 0.9832... 0.1268 sec/batch\n", "Epoch: 13/20... Training Step: 5623... Training loss: 1.0096... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5624... Training loss: 0.9549... 0.1162 sec/batch\n", "Epoch: 13/20... Training Step: 5625... Training loss: 1.1362... 0.1195 sec/batch\n", "Epoch: 13/20... Training Step: 5626... Training loss: 1.0608... 0.1197 sec/batch\n", "Epoch: 13/20... Training Step: 5627... Training loss: 0.8064... 0.1144 sec/batch\n", "Epoch: 13/20... Training Step: 5628... Training loss: 0.9704... 0.1188 sec/batch\n", "Epoch: 13/20... Training Step: 5629... Training loss: 0.8764... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5630... Training loss: 1.1256... 0.1198 sec/batch\n", "Epoch: 13/20... Training Step: 5631... Training loss: 1.0087... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5632... Training loss: 1.1505... 0.1158 sec/batch\n", "Epoch: 13/20... Training Step: 5633... Training loss: 0.9525... 0.1267 sec/batch\n", "Epoch: 13/20... Training Step: 5634... Training loss: 1.0417... 0.1164 sec/batch\n", "Epoch: 13/20... Training Step: 5635... Training loss: 1.0441... 0.1266 sec/batch\n", "Epoch: 13/20... Training Step: 5636... Training loss: 1.1168... 0.1180 sec/batch\n", "Epoch: 13/20... Training Step: 5637... Training loss: 1.0455... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5638... Training loss: 1.0731... 0.1129 sec/batch\n", "Epoch: 13/20... Training Step: 5639... Training loss: 1.2368... 0.1277 sec/batch\n", "Epoch: 13/20... Training Step: 5640... Training loss: 0.8461... 0.1203 sec/batch\n", "Epoch: 13/20... Training Step: 5641... Training loss: 0.9797... 0.1198 sec/batch\n", "Epoch: 13/20... Training Step: 5642... Training loss: 0.9000... 0.1147 sec/batch\n", "Epoch: 13/20... Training Step: 5643... Training loss: 1.1810... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5644... Training loss: 0.9090... 0.1193 sec/batch\n", "Epoch: 13/20... Training Step: 5645... Training loss: 0.9550... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5646... Training loss: 0.9823... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5647... Training loss: 1.0535... 0.1124 sec/batch\n", "Epoch: 13/20... Training Step: 5648... Training loss: 0.9529... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5649... Training loss: 1.1554... 0.1158 sec/batch\n", "Epoch: 13/20... Training Step: 5650... Training loss: 1.1047... 0.1120 sec/batch\n", "Epoch: 13/20... Training Step: 5651... Training loss: 0.9081... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5652... Training loss: 1.1565... 0.1141 sec/batch\n", "Epoch: 13/20... Training Step: 5653... Training loss: 1.1734... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5654... Training loss: 1.3048... 0.1134 sec/batch\n", "Epoch: 13/20... Training Step: 5655... Training loss: 0.9735... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5656... Training loss: 1.1335... 0.1118 sec/batch\n", "Epoch: 13/20... Training Step: 5657... Training loss: 1.1569... 0.1139 sec/batch\n", "Epoch: 13/20... Training Step: 5658... Training loss: 1.0987... 0.1210 sec/batch\n", "Epoch: 13/20... Training Step: 5659... Training loss: 1.2668... 0.1206 sec/batch\n", "Epoch: 13/20... Training Step: 5660... Training loss: 1.1898... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5661... Training loss: 1.0015... 0.1205 sec/batch\n", "Epoch: 13/20... Training Step: 5662... Training loss: 1.1512... 0.1162 sec/batch\n", "Epoch: 13/20... Training Step: 5663... Training loss: 0.9458... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5664... Training loss: 0.9985... 0.1198 sec/batch\n", "Epoch: 13/20... Training Step: 5665... Training loss: 1.2202... 0.1157 sec/batch\n", "Epoch: 13/20... Training Step: 5666... Training loss: 1.0659... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5667... Training loss: 1.1439... 0.1206 sec/batch\n", "Epoch: 13/20... Training Step: 5668... Training loss: 1.1210... 0.1144 sec/batch\n", "Epoch: 13/20... Training Step: 5669... Training loss: 1.1505... 0.1201 sec/batch\n", "Epoch: 13/20... Training Step: 5670... Training loss: 1.1517... 0.1150 sec/batch\n", "Epoch: 13/20... Training Step: 5671... Training loss: 1.2223... 0.1149 sec/batch\n", "Epoch: 13/20... Training Step: 5672... Training loss: 1.0279... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5673... Training loss: 1.2505... 0.1133 sec/batch\n", "Epoch: 13/20... Training Step: 5674... Training loss: 1.1406... 0.1178 sec/batch\n", "Epoch: 13/20... Training Step: 5675... Training loss: 1.0036... 0.1202 sec/batch\n", "Epoch: 13/20... Training Step: 5676... Training loss: 1.1441... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5677... Training loss: 1.1081... 0.1141 sec/batch\n", "Epoch: 13/20... Training Step: 5678... Training loss: 0.9733... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5679... Training loss: 1.0260... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5680... Training loss: 0.9091... 0.1195 sec/batch\n", "Epoch: 13/20... Training Step: 5681... Training loss: 1.1194... 0.1247 sec/batch\n", "Epoch: 13/20... Training Step: 5682... Training loss: 1.2502... 0.1164 sec/batch\n", "Epoch: 13/20... Training Step: 5683... Training loss: 1.0613... 0.1168 sec/batch\n", "Epoch: 13/20... Training Step: 5684... Training loss: 0.9593... 0.1134 sec/batch\n", "Epoch: 13/20... Training Step: 5685... Training loss: 1.1959... 0.1181 sec/batch\n", "Epoch: 13/20... Training Step: 5686... Training loss: 1.0440... 0.1129 sec/batch\n", "Epoch: 13/20... Training Step: 5687... Training loss: 1.1355... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5688... Training loss: 0.8495... 0.1130 sec/batch\n", "Epoch: 13/20... Training Step: 5689... Training loss: 1.1435... 0.1158 sec/batch\n", "Epoch: 13/20... Training Step: 5690... Training loss: 1.1619... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5691... Training loss: 1.0946... 0.1150 sec/batch\n", "Epoch: 13/20... Training Step: 5692... Training loss: 1.1469... 0.1169 sec/batch\n", "Epoch: 13/20... Training Step: 5693... Training loss: 1.1945... 0.1211 sec/batch\n", "Epoch: 13/20... Training Step: 5694... Training loss: 1.0238... 0.1133 sec/batch\n", "Epoch: 13/20... Training Step: 5695... Training loss: 1.0639... 0.1151 sec/batch\n", "Epoch: 13/20... Training Step: 5696... Training loss: 1.1610... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5697... Training loss: 1.0304... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5698... Training loss: 1.0398... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5699... Training loss: 1.2267... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5700... Training loss: 1.0870... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5701... Training loss: 1.0448... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5702... Training loss: 1.1577... 0.1129 sec/batch\n", "Epoch: 13/20... Training Step: 5703... Training loss: 1.0593... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5704... Training loss: 0.9506... 0.1146 sec/batch\n", "Epoch: 13/20... Training Step: 5705... Training loss: 0.9555... 0.1178 sec/batch\n", "Epoch: 13/20... Training Step: 5706... Training loss: 1.1189... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5707... Training loss: 0.9679... 0.1220 sec/batch\n", "Epoch: 13/20... Training Step: 5708... Training loss: 1.1256... 0.1164 sec/batch\n", "Epoch: 13/20... Training Step: 5709... Training loss: 0.8427... 0.1189 sec/batch\n", "Epoch: 13/20... Training Step: 5710... Training loss: 0.9894... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5711... Training loss: 0.9464... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5712... Training loss: 1.0726... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5713... Training loss: 1.0870... 0.1170 sec/batch\n", "Epoch: 13/20... Training Step: 5714... Training loss: 1.0116... 0.1140 sec/batch\n", "Epoch: 13/20... Training Step: 5715... Training loss: 0.9717... 0.1217 sec/batch\n", "Epoch: 13/20... Training Step: 5716... Training loss: 1.0249... 0.1143 sec/batch\n", "Epoch: 13/20... Training Step: 5717... Training loss: 0.9965... 0.1189 sec/batch\n", "Epoch: 13/20... Training Step: 5718... Training loss: 1.1242... 0.1160 sec/batch\n", "Epoch: 13/20... Training Step: 5719... Training loss: 1.0635... 0.1199 sec/batch\n", "Epoch: 13/20... Training Step: 5720... Training loss: 1.0863... 0.1170 sec/batch\n", "Epoch: 13/20... Training Step: 5721... Training loss: 1.1673... 0.1201 sec/batch\n", "Epoch: 13/20... Training Step: 5722... Training loss: 1.1029... 0.1187 sec/batch\n", "Epoch: 13/20... Training Step: 5723... Training loss: 1.0336... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5724... Training loss: 0.9700... 0.1197 sec/batch\n", "Epoch: 13/20... Training Step: 5725... Training loss: 1.0438... 0.1156 sec/batch\n", "Epoch: 13/20... Training Step: 5726... Training loss: 1.1453... 0.1190 sec/batch\n", "Epoch: 13/20... Training Step: 5727... Training loss: 0.9405... 0.1150 sec/batch\n", "Epoch: 13/20... Training Step: 5728... Training loss: 1.1404... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5729... Training loss: 1.1644... 0.1167 sec/batch\n", "Epoch: 13/20... Training Step: 5730... Training loss: 1.1370... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5731... Training loss: 1.2007... 0.1202 sec/batch\n", "Epoch: 13/20... Training Step: 5732... Training loss: 1.0676... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5733... Training loss: 1.0706... 0.1202 sec/batch\n", "Epoch: 13/20... Training Step: 5734... Training loss: 0.9332... 0.1156 sec/batch\n", "Epoch: 13/20... Training Step: 5735... Training loss: 0.9124... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5736... Training loss: 1.1066... 0.1193 sec/batch\n", "Epoch: 13/20... Training Step: 5737... Training loss: 0.9197... 0.1197 sec/batch\n", "Epoch: 13/20... Training Step: 5738... Training loss: 1.0114... 0.1187 sec/batch\n", "Epoch: 13/20... Training Step: 5739... Training loss: 1.1476... 0.1196 sec/batch\n", "Epoch: 13/20... Training Step: 5740... Training loss: 1.2345... 0.1228 sec/batch\n", "Epoch: 13/20... Training Step: 5741... Training loss: 0.9743... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5742... Training loss: 1.0378... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5743... Training loss: 1.3188... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5744... Training loss: 0.8592... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5745... Training loss: 0.9050... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5746... Training loss: 1.1900... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5747... Training loss: 0.8705... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5748... Training loss: 1.0660... 0.1134 sec/batch\n", "Epoch: 13/20... Training Step: 5749... Training loss: 0.8465... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5750... Training loss: 1.2064... 0.1181 sec/batch\n", "Epoch: 13/20... Training Step: 5751... Training loss: 1.1062... 0.1151 sec/batch\n", "Epoch: 13/20... Training Step: 5752... Training loss: 1.0066... 0.1178 sec/batch\n", "Epoch: 13/20... Training Step: 5753... Training loss: 1.2165... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5754... Training loss: 1.1694... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5755... Training loss: 1.1626... 0.1209 sec/batch\n", "Epoch: 13/20... Training Step: 5756... Training loss: 1.0335... 0.1207 sec/batch\n", "Epoch: 13/20... Training Step: 5757... Training loss: 1.1198... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5758... Training loss: 1.0774... 0.1147 sec/batch\n", "Epoch: 13/20... Training Step: 5759... Training loss: 0.9965... 0.1141 sec/batch\n", "Epoch: 13/20... Training Step: 5760... Training loss: 1.0717... 0.1158 sec/batch\n", "Epoch: 13/20... Training Step: 5761... Training loss: 1.0366... 0.1210 sec/batch\n", "Epoch: 13/20... Training Step: 5762... Training loss: 1.1383... 0.1221 sec/batch\n", "Epoch: 13/20... Training Step: 5763... Training loss: 1.0045... 0.1217 sec/batch\n", "Epoch: 13/20... Training Step: 5764... Training loss: 1.0870... 0.1227 sec/batch\n", "Epoch: 13/20... Training Step: 5765... Training loss: 1.0689... 0.1222 sec/batch\n", "Epoch: 13/20... Training Step: 5766... Training loss: 1.1760... 0.1259 sec/batch\n", "Epoch: 13/20... Training Step: 5767... Training loss: 0.9432... 0.1260 sec/batch\n", "Epoch: 13/20... Training Step: 5768... Training loss: 1.1234... 0.1282 sec/batch\n", "Epoch: 13/20... Training Step: 5769... Training loss: 0.9797... 0.1267 sec/batch\n", "Epoch: 13/20... Training Step: 5770... Training loss: 1.0814... 0.1250 sec/batch\n", "Epoch: 13/20... Training Step: 5771... Training loss: 1.0289... 0.1191 sec/batch\n", "Epoch: 13/20... Training Step: 5772... Training loss: 1.1869... 0.1233 sec/batch\n", "Epoch: 13/20... Training Step: 5773... Training loss: 0.9786... 0.1218 sec/batch\n", "Epoch: 13/20... Training Step: 5774... Training loss: 1.0491... 0.1152 sec/batch\n", "Epoch: 13/20... Training Step: 5775... Training loss: 0.9800... 0.1134 sec/batch\n", "Epoch: 13/20... Training Step: 5776... Training loss: 1.1676... 0.1119 sec/batch\n", "Epoch: 13/20... Training Step: 5777... Training loss: 1.1212... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5778... Training loss: 0.9392... 0.1241 sec/batch\n", "Epoch: 13/20... Training Step: 5779... Training loss: 0.9613... 0.1224 sec/batch\n", "Epoch: 13/20... Training Step: 5780... Training loss: 1.1864... 0.1304 sec/batch\n", "Epoch: 13/20... Training Step: 5781... Training loss: 1.1274... 0.1262 sec/batch\n", "Epoch: 13/20... Training Step: 5782... Training loss: 1.0919... 0.1238 sec/batch\n", "Epoch: 13/20... Training Step: 5783... Training loss: 1.0729... 0.1322 sec/batch\n", "Epoch: 13/20... Training Step: 5784... Training loss: 1.0474... 0.1377 sec/batch\n", "Epoch: 13/20... Training Step: 5785... Training loss: 0.9907... 0.1387 sec/batch\n", "Epoch: 13/20... Training Step: 5786... Training loss: 1.1713... 0.1359 sec/batch\n", "Epoch: 13/20... Training Step: 5787... Training loss: 1.1332... 0.1315 sec/batch\n", "Epoch: 13/20... Training Step: 5788... Training loss: 1.0928... 0.1266 sec/batch\n", "Epoch: 13/20... Training Step: 5789... Training loss: 0.9800... 0.1223 sec/batch\n", "Epoch: 13/20... Training Step: 5790... Training loss: 1.4158... 0.1313 sec/batch\n", "Epoch: 13/20... Training Step: 5791... Training loss: 1.2106... 0.1277 sec/batch\n", "Epoch: 13/20... Training Step: 5792... Training loss: 1.1993... 0.1263 sec/batch\n", "Epoch: 13/20... Training Step: 5793... Training loss: 1.0229... 0.1237 sec/batch\n", "Epoch: 13/20... Training Step: 5794... Training loss: 1.1773... 0.1283 sec/batch\n", "Epoch: 13/20... Training Step: 5795... Training loss: 1.2700... 0.1223 sec/batch\n", "Epoch: 13/20... Training Step: 5796... Training loss: 0.9858... 0.1220 sec/batch\n", "Epoch: 13/20... Training Step: 5797... Training loss: 1.0560... 0.1491 sec/batch\n", "Epoch: 13/20... Training Step: 5798... Training loss: 1.0637... 0.1421 sec/batch\n", "Epoch: 13/20... Training Step: 5799... Training loss: 1.0386... 0.1328 sec/batch\n", "Epoch: 13/20... Training Step: 5800... Training loss: 1.0529... 0.1331 sec/batch\n", "Epoch: 13/20... Training Step: 5801... Training loss: 1.2854... 0.1236 sec/batch\n", "Epoch: 13/20... Training Step: 5802... Training loss: 1.0100... 0.1249 sec/batch\n", "Epoch: 13/20... Training Step: 5803... Training loss: 1.3172... 0.1214 sec/batch\n", "Epoch: 13/20... Training Step: 5804... Training loss: 0.9567... 0.1222 sec/batch\n", "Epoch: 13/20... Training Step: 5805... Training loss: 1.2188... 0.1214 sec/batch\n", "Epoch: 13/20... Training Step: 5806... Training loss: 0.9628... 0.1243 sec/batch\n", "Epoch: 13/20... Training Step: 5807... Training loss: 1.1559... 0.1254 sec/batch\n", "Epoch: 13/20... Training Step: 5808... Training loss: 1.1193... 0.1193 sec/batch\n", "Epoch: 13/20... Training Step: 5809... Training loss: 1.0281... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5810... Training loss: 0.9938... 0.1169 sec/batch\n", "Epoch: 13/20... Training Step: 5811... Training loss: 1.1712... 0.1264 sec/batch\n", "Epoch: 13/20... Training Step: 5812... Training loss: 1.1241... 0.1273 sec/batch\n", "Epoch: 13/20... Training Step: 5813... Training loss: 1.0484... 0.1255 sec/batch\n", "Epoch: 13/20... Training Step: 5814... Training loss: 0.9546... 0.1241 sec/batch\n", "Epoch: 13/20... Training Step: 5815... Training loss: 1.0930... 0.1227 sec/batch\n", "Epoch: 13/20... Training Step: 5816... Training loss: 1.3008... 0.1227 sec/batch\n", "Epoch: 13/20... Training Step: 5817... Training loss: 1.1660... 0.1113 sec/batch\n", "Epoch: 13/20... Training Step: 5818... Training loss: 1.0542... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5819... Training loss: 1.1609... 0.1150 sec/batch\n", "Epoch: 13/20... Training Step: 5820... Training loss: 0.9762... 0.1314 sec/batch\n", "Epoch: 13/20... Training Step: 5821... Training loss: 0.9722... 0.1342 sec/batch\n", "Epoch: 13/20... Training Step: 5822... Training loss: 1.0023... 0.1311 sec/batch\n", "Epoch: 13/20... Training Step: 5823... Training loss: 1.0188... 0.1335 sec/batch\n", "Epoch: 13/20... Training Step: 5824... Training loss: 1.1014... 0.1228 sec/batch\n", "Epoch: 13/20... Training Step: 5825... Training loss: 1.2401... 0.1255 sec/batch\n", "Epoch: 13/20... Training Step: 5826... Training loss: 1.0450... 0.1337 sec/batch\n", "Epoch: 13/20... Training Step: 5827... Training loss: 0.9192... 0.1320 sec/batch\n", "Epoch: 13/20... Training Step: 5828... Training loss: 1.0921... 0.1322 sec/batch\n", "Epoch: 13/20... Training Step: 5829... Training loss: 1.0379... 0.1334 sec/batch\n", "Epoch: 13/20... Training Step: 5830... Training loss: 1.0958... 0.1291 sec/batch\n", "Epoch: 13/20... Training Step: 5831... Training loss: 1.0439... 0.1264 sec/batch\n", "Epoch: 13/20... Training Step: 5832... Training loss: 1.0391... 0.1423 sec/batch\n", "Epoch: 13/20... Training Step: 5833... Training loss: 1.1474... 0.1314 sec/batch\n", "Epoch: 13/20... Training Step: 5834... Training loss: 1.0413... 0.1279 sec/batch\n", "Epoch: 13/20... Training Step: 5835... Training loss: 1.1615... 0.1270 sec/batch\n", "Epoch: 13/20... Training Step: 5836... Training loss: 1.1515... 0.1269 sec/batch\n", "Epoch: 13/20... Training Step: 5837... Training loss: 1.2108... 0.1251 sec/batch\n", "Epoch: 13/20... Training Step: 5838... Training loss: 1.1510... 0.1269 sec/batch\n", "Epoch: 13/20... Training Step: 5839... Training loss: 1.2229... 0.1211 sec/batch\n", "Epoch: 13/20... Training Step: 5840... Training loss: 1.2778... 0.1260 sec/batch\n", "Epoch: 13/20... Training Step: 5841... Training loss: 1.1301... 0.1306 sec/batch\n", "Epoch: 13/20... Training Step: 5842... Training loss: 1.1939... 0.1302 sec/batch\n", "Epoch: 13/20... Training Step: 5843... Training loss: 1.2127... 0.1162 sec/batch\n", "Epoch: 13/20... Training Step: 5844... Training loss: 1.0126... 0.1276 sec/batch\n", "Epoch: 13/20... Training Step: 5845... Training loss: 1.0920... 0.1253 sec/batch\n", "Epoch: 13/20... Training Step: 5846... Training loss: 1.3305... 0.1247 sec/batch\n", "Epoch: 13/20... Training Step: 5847... Training loss: 1.1012... 0.1188 sec/batch\n", "Epoch: 13/20... Training Step: 5848... Training loss: 1.0337... 0.1143 sec/batch\n", "Epoch: 13/20... Training Step: 5849... Training loss: 1.0578... 0.1271 sec/batch\n", "Epoch: 13/20... Training Step: 5850... Training loss: 1.0482... 0.1221 sec/batch\n", "Epoch: 13/20... Training Step: 5851... Training loss: 1.1506... 0.1202 sec/batch\n", "Epoch: 13/20... Training Step: 5852... Training loss: 1.1729... 0.1272 sec/batch\n", "Epoch: 13/20... Training Step: 5853... Training loss: 1.0237... 0.1306 sec/batch\n", "Epoch: 13/20... Training Step: 5854... Training loss: 1.1389... 0.1344 sec/batch\n", "Epoch: 13/20... Training Step: 5855... Training loss: 1.1731... 0.1285 sec/batch\n", "Epoch: 13/20... Training Step: 5856... Training loss: 1.1811... 0.1221 sec/batch\n", "Epoch: 13/20... Training Step: 5857... Training loss: 1.0998... 0.1229 sec/batch\n", "Epoch: 13/20... Training Step: 5858... Training loss: 1.1203... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5859... Training loss: 1.1398... 0.1301 sec/batch\n", "Epoch: 13/20... Training Step: 5860... Training loss: 1.0682... 0.1255 sec/batch\n", "Epoch: 13/20... Training Step: 5861... Training loss: 1.0534... 0.1209 sec/batch\n", "Epoch: 13/20... Training Step: 5862... Training loss: 1.1779... 0.1275 sec/batch\n", "Epoch: 13/20... Training Step: 5863... Training loss: 1.1096... 0.1307 sec/batch\n", "Epoch: 13/20... Training Step: 5864... Training loss: 1.2879... 0.1263 sec/batch\n", "Epoch: 13/20... Training Step: 5865... Training loss: 1.0792... 0.1272 sec/batch\n", "Epoch: 13/20... Training Step: 5866... Training loss: 1.0546... 0.1322 sec/batch\n", "Epoch: 13/20... Training Step: 5867... Training loss: 1.1716... 0.1247 sec/batch\n", "Epoch: 13/20... Training Step: 5868... Training loss: 1.0897... 0.1338 sec/batch\n", "Epoch: 13/20... Training Step: 5869... Training loss: 1.1111... 0.1253 sec/batch\n", "Epoch: 13/20... Training Step: 5870... Training loss: 1.0674... 0.1309 sec/batch\n", "Epoch: 13/20... Training Step: 5871... Training loss: 0.9127... 0.1295 sec/batch\n", "Epoch: 13/20... Training Step: 5872... Training loss: 1.3002... 0.1223 sec/batch\n", "Epoch: 13/20... Training Step: 5873... Training loss: 1.0138... 0.1321 sec/batch\n", "Epoch: 13/20... Training Step: 5874... Training loss: 1.2084... 0.1301 sec/batch\n", "Epoch: 13/20... Training Step: 5875... Training loss: 1.0819... 0.1282 sec/batch\n", "Epoch: 13/20... Training Step: 5876... Training loss: 1.2980... 0.1270 sec/batch\n", "Epoch: 13/20... Training Step: 5877... Training loss: 1.2670... 0.1294 sec/batch\n", "Epoch: 13/20... Training Step: 5878... Training loss: 1.1470... 0.1283 sec/batch\n", "Epoch: 13/20... Training Step: 5879... Training loss: 1.0598... 0.1198 sec/batch\n", "Epoch: 13/20... Training Step: 5880... Training loss: 1.0111... 0.1175 sec/batch\n", "Epoch: 13/20... Training Step: 5881... Training loss: 0.9267... 0.1134 sec/batch\n", "Epoch: 13/20... Training Step: 5882... Training loss: 0.9689... 0.1123 sec/batch\n", "Epoch: 13/20... Training Step: 5883... Training loss: 0.9120... 0.1188 sec/batch\n", "Epoch: 13/20... Training Step: 5884... Training loss: 0.9019... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5885... Training loss: 0.9019... 0.1169 sec/batch\n", "Epoch: 13/20... Training Step: 5886... Training loss: 1.0401... 0.1190 sec/batch\n", "Epoch: 13/20... Training Step: 5887... Training loss: 0.9876... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 5888... Training loss: 1.0192... 0.1190 sec/batch\n", "Epoch: 13/20... Training Step: 5889... Training loss: 0.9558... 0.1157 sec/batch\n", "Epoch: 13/20... Training Step: 5890... Training loss: 1.1911... 0.1148 sec/batch\n", "Epoch: 13/20... Training Step: 5891... Training loss: 0.9931... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5892... Training loss: 0.9508... 0.1224 sec/batch\n", "Epoch: 13/20... Training Step: 5893... Training loss: 0.9365... 0.1277 sec/batch\n", "Epoch: 13/20... Training Step: 5894... Training loss: 0.8580... 0.1258 sec/batch\n", "Epoch: 13/20... Training Step: 5895... Training loss: 1.0147... 0.1209 sec/batch\n", "Epoch: 13/20... Training Step: 5896... Training loss: 0.9630... 0.1125 sec/batch\n", "Epoch: 13/20... Training Step: 5897... Training loss: 1.1806... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5898... Training loss: 0.9471... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5899... Training loss: 1.0372... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 5900... Training loss: 1.1011... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5901... Training loss: 0.9619... 0.1195 sec/batch\n", "Epoch: 13/20... Training Step: 5902... Training loss: 0.9586... 0.1159 sec/batch\n", "Epoch: 13/20... Training Step: 5903... Training loss: 1.0569... 0.1157 sec/batch\n", "Epoch: 13/20... Training Step: 5904... Training loss: 1.0157... 0.1218 sec/batch\n", "Epoch: 13/20... Training Step: 5905... Training loss: 0.9543... 0.1187 sec/batch\n", "Epoch: 13/20... Training Step: 5906... Training loss: 0.9973... 0.1189 sec/batch\n", "Epoch: 13/20... Training Step: 5907... Training loss: 1.0254... 0.1147 sec/batch\n", "Epoch: 13/20... Training Step: 5908... Training loss: 0.9831... 0.1157 sec/batch\n", "Epoch: 13/20... Training Step: 5909... Training loss: 1.0192... 0.1204 sec/batch\n", "Epoch: 13/20... Training Step: 5910... Training loss: 1.1074... 0.1189 sec/batch\n", "Epoch: 13/20... Training Step: 5911... Training loss: 0.9342... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5912... Training loss: 1.0559... 0.1146 sec/batch\n", "Epoch: 13/20... Training Step: 5913... Training loss: 0.9799... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 5914... Training loss: 1.0144... 0.1179 sec/batch\n", "Epoch: 13/20... Training Step: 5915... Training loss: 0.9269... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5916... Training loss: 1.2286... 0.1159 sec/batch\n", "Epoch: 13/20... Training Step: 5917... Training loss: 0.9672... 0.1175 sec/batch\n", "Epoch: 13/20... Training Step: 5918... Training loss: 1.0843... 0.1218 sec/batch\n", "Epoch: 13/20... Training Step: 5919... Training loss: 1.0767... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5920... Training loss: 1.0132... 0.1202 sec/batch\n", "Epoch: 13/20... Training Step: 5921... Training loss: 0.8628... 0.1160 sec/batch\n", "Epoch: 13/20... Training Step: 5922... Training loss: 0.7347... 0.1124 sec/batch\n", "Epoch: 13/20... Training Step: 5923... Training loss: 1.0701... 0.1185 sec/batch\n", "Epoch: 13/20... Training Step: 5924... Training loss: 1.0710... 0.1182 sec/batch\n", "Epoch: 13/20... Training Step: 5925... Training loss: 0.9147... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 5926... Training loss: 1.1501... 0.1167 sec/batch\n", "Epoch: 13/20... Training Step: 5927... Training loss: 1.1788... 0.1169 sec/batch\n", "Epoch: 13/20... Training Step: 5928... Training loss: 0.8771... 0.1198 sec/batch\n", "Epoch: 13/20... Training Step: 5929... Training loss: 1.0714... 0.1136 sec/batch\n", "Epoch: 13/20... Training Step: 5930... Training loss: 1.1129... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5931... Training loss: 0.9651... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5932... Training loss: 0.9424... 0.1179 sec/batch\n", "Epoch: 13/20... Training Step: 5933... Training loss: 0.9477... 0.1180 sec/batch\n", "Epoch: 13/20... Training Step: 5934... Training loss: 1.1097... 0.1151 sec/batch\n", "Epoch: 13/20... Training Step: 5935... Training loss: 0.9688... 0.1147 sec/batch\n", "Epoch: 13/20... Training Step: 5936... Training loss: 1.2317... 0.1178 sec/batch\n", "Epoch: 13/20... Training Step: 5937... Training loss: 1.0855... 0.1152 sec/batch\n", "Epoch: 13/20... Training Step: 5938... Training loss: 1.1371... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 5939... Training loss: 0.9546... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5940... Training loss: 1.1839... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5941... Training loss: 0.8850... 0.1167 sec/batch\n", "Epoch: 13/20... Training Step: 5942... Training loss: 1.1405... 0.1197 sec/batch\n", "Epoch: 13/20... Training Step: 5943... Training loss: 1.0854... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5944... Training loss: 1.1355... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5945... Training loss: 1.2111... 0.1100 sec/batch\n", "Epoch: 13/20... Training Step: 5946... Training loss: 1.1404... 0.1176 sec/batch\n", "Epoch: 13/20... Training Step: 5947... Training loss: 0.9584... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5948... Training loss: 1.0545... 0.1143 sec/batch\n", "Epoch: 13/20... Training Step: 5949... Training loss: 0.9521... 0.1215 sec/batch\n", "Epoch: 13/20... Training Step: 5950... Training loss: 1.0845... 0.1172 sec/batch\n", "Epoch: 13/20... Training Step: 5951... Training loss: 1.0280... 0.1159 sec/batch\n", "Epoch: 13/20... Training Step: 5952... Training loss: 1.0984... 0.1151 sec/batch\n", "Epoch: 13/20... Training Step: 5953... Training loss: 1.0157... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 5954... Training loss: 1.0705... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5955... Training loss: 1.0640... 0.1183 sec/batch\n", "Epoch: 13/20... Training Step: 5956... Training loss: 0.9209... 0.1180 sec/batch\n", "Epoch: 13/20... Training Step: 5957... Training loss: 0.9253... 0.1217 sec/batch\n", "Epoch: 13/20... Training Step: 5958... Training loss: 1.0451... 0.1182 sec/batch\n", "Epoch: 13/20... Training Step: 5959... Training loss: 0.9848... 0.1178 sec/batch\n", "Epoch: 13/20... Training Step: 5960... Training loss: 0.9407... 0.1167 sec/batch\n", "Epoch: 13/20... Training Step: 5961... Training loss: 1.0383... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 5962... Training loss: 0.9617... 0.1175 sec/batch\n", "Epoch: 13/20... Training Step: 5963... Training loss: 0.8913... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5964... Training loss: 1.1068... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5965... Training loss: 1.0062... 0.1122 sec/batch\n", "Epoch: 13/20... Training Step: 5966... Training loss: 0.9666... 0.1164 sec/batch\n", "Epoch: 13/20... Training Step: 5967... Training loss: 1.0783... 0.1218 sec/batch\n", "Epoch: 13/20... Training Step: 5968... Training loss: 1.1321... 0.1156 sec/batch\n", "Epoch: 13/20... Training Step: 5969... Training loss: 0.9649... 0.1140 sec/batch\n", "Epoch: 13/20... Training Step: 5970... Training loss: 0.9317... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5971... Training loss: 0.8931... 0.1165 sec/batch\n", "Epoch: 13/20... Training Step: 5972... Training loss: 1.1227... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 5973... Training loss: 1.0442... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5974... Training loss: 1.2312... 0.1174 sec/batch\n", "Epoch: 13/20... Training Step: 5975... Training loss: 0.8789... 0.1175 sec/batch\n", "Epoch: 13/20... Training Step: 5976... Training loss: 1.2985... 0.1224 sec/batch\n", "Epoch: 13/20... Training Step: 5977... Training loss: 0.9790... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5978... Training loss: 1.0406... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 5979... Training loss: 1.0173... 0.1206 sec/batch\n", "Epoch: 13/20... Training Step: 5980... Training loss: 1.0306... 0.1240 sec/batch\n", "Epoch: 13/20... Training Step: 5981... Training loss: 1.1039... 0.1189 sec/batch\n", "Epoch: 13/20... Training Step: 5982... Training loss: 1.0494... 0.1208 sec/batch\n", "Epoch: 13/20... Training Step: 5983... Training loss: 1.1112... 0.1223 sec/batch\n", "Epoch: 13/20... Training Step: 5984... Training loss: 1.1670... 0.1199 sec/batch\n", "Epoch: 13/20... Training Step: 5985... Training loss: 1.1274... 0.1205 sec/batch\n", "Epoch: 13/20... Training Step: 5986... Training loss: 0.9250... 0.1167 sec/batch\n", "Epoch: 13/20... Training Step: 5987... Training loss: 1.0573... 0.1177 sec/batch\n", "Epoch: 13/20... Training Step: 5988... Training loss: 1.0105... 0.1152 sec/batch\n", "Epoch: 13/20... Training Step: 5989... Training loss: 1.0946... 0.1193 sec/batch\n", "Epoch: 13/20... Training Step: 5990... Training loss: 1.0256... 0.1187 sec/batch\n", "Epoch: 13/20... Training Step: 5991... Training loss: 0.9351... 0.1171 sec/batch\n", "Epoch: 13/20... Training Step: 5992... Training loss: 1.1021... 0.1180 sec/batch\n", "Epoch: 13/20... Training Step: 5993... Training loss: 1.0737... 0.1163 sec/batch\n", "Epoch: 13/20... Training Step: 5994... Training loss: 1.0272... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5995... Training loss: 1.0123... 0.1195 sec/batch\n", "Epoch: 13/20... Training Step: 5996... Training loss: 0.9407... 0.1186 sec/batch\n", "Epoch: 13/20... Training Step: 5997... Training loss: 0.9677... 0.1187 sec/batch\n", "Epoch: 13/20... Training Step: 5998... Training loss: 1.0411... 0.1166 sec/batch\n", "Epoch: 13/20... Training Step: 5999... Training loss: 1.1877... 0.1220 sec/batch\n", "Epoch: 13/20... Training Step: 6000... Training loss: 0.9704... 0.1181 sec/batch\n", "Epoch: 13/20... Training Step: 6001... Training loss: 1.1624... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 6002... Training loss: 1.1678... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 6003... Training loss: 0.9040... 0.1154 sec/batch\n", "Epoch: 13/20... Training Step: 6004... Training loss: 1.0860... 0.1188 sec/batch\n", "Epoch: 13/20... Training Step: 6005... Training loss: 1.0141... 0.1192 sec/batch\n", "Epoch: 13/20... Training Step: 6006... Training loss: 1.1756... 0.1143 sec/batch\n", "Epoch: 13/20... Training Step: 6007... Training loss: 1.2744... 0.1184 sec/batch\n", "Epoch: 13/20... Training Step: 6008... Training loss: 1.2727... 0.1164 sec/batch\n", "Epoch: 13/20... Training Step: 6009... Training loss: 0.9475... 0.1152 sec/batch\n", "Epoch: 13/20... Training Step: 6010... Training loss: 0.9778... 0.1131 sec/batch\n", "Epoch: 13/20... Training Step: 6011... Training loss: 1.0596... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 6012... Training loss: 1.0744... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 6013... Training loss: 1.0827... 0.1173 sec/batch\n", "Epoch: 13/20... Training Step: 6014... Training loss: 1.0531... 0.1182 sec/batch\n", "Epoch: 13/20... Training Step: 6015... Training loss: 1.1182... 0.1155 sec/batch\n", "Epoch: 13/20... Training Step: 6016... Training loss: 0.9992... 0.1182 sec/batch\n", "Epoch: 13/20... Training Step: 6017... Training loss: 1.1095... 0.1161 sec/batch\n", "Epoch: 13/20... Training Step: 6018... Training loss: 0.9687... 0.1138 sec/batch\n", "Epoch: 13/20... Training Step: 6019... Training loss: 1.0952... 0.1143 sec/batch\n", "Epoch: 13/20... Training Step: 6020... Training loss: 1.0801... 0.1142 sec/batch\n", "Epoch: 13/20... Training Step: 6021... Training loss: 1.0713... 0.1107 sec/batch\n", "Epoch: 13/20... Training Step: 6022... Training loss: 1.2881... 0.1168 sec/batch\n", "Epoch: 13/20... Training Step: 6023... Training loss: 1.1043... 0.1144 sec/batch\n", "Epoch: 13/20... Training Step: 6024... Training loss: 0.9579... 0.1159 sec/batch\n", "Epoch: 13/20... Training Step: 6025... Training loss: 1.0226... 0.1145 sec/batch\n", "Epoch: 13/20... Training Step: 6026... Training loss: 0.8833... 0.1110 sec/batch\n", "Epoch: 13/20... Training Step: 6027... Training loss: 0.9355... 0.1142 sec/batch\n", "Epoch: 13/20... Training Step: 6028... Training loss: 0.9681... 0.1140 sec/batch\n", "Epoch: 13/20... Training Step: 6029... Training loss: 1.0691... 0.1139 sec/batch\n", "Epoch: 13/20... Training Step: 6030... Training loss: 0.9311... 0.1205 sec/batch\n", "Epoch: 13/20... Training Step: 6031... Training loss: 1.0487... 0.1140 sec/batch\n", "Epoch: 13/20... Training Step: 6032... Training loss: 1.0070... 0.1148 sec/batch\n", "Epoch: 14/20... Training Step: 6033... Training loss: 1.2961... 0.1144 sec/batch\n", "Epoch: 14/20... Training Step: 6034... Training loss: 1.1728... 0.1256 sec/batch\n", "Epoch: 14/20... Training Step: 6035... Training loss: 1.1087... 0.1270 sec/batch\n", "Epoch: 14/20... Training Step: 6036... Training loss: 1.1693... 0.1216 sec/batch\n", "Epoch: 14/20... Training Step: 6037... Training loss: 1.2070... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6038... Training loss: 0.9317... 0.1198 sec/batch\n", "Epoch: 14/20... Training Step: 6039... Training loss: 1.1026... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6040... Training loss: 0.9525... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6041... Training loss: 0.9236... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6042... Training loss: 1.1687... 0.1200 sec/batch\n", "Epoch: 14/20... Training Step: 6043... Training loss: 0.9674... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6044... Training loss: 0.9133... 0.1146 sec/batch\n", "Epoch: 14/20... Training Step: 6045... Training loss: 1.3963... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6046... Training loss: 0.8980... 0.1151 sec/batch\n", "Epoch: 14/20... Training Step: 6047... Training loss: 1.2099... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6048... Training loss: 1.0376... 0.1153 sec/batch\n", "Epoch: 14/20... Training Step: 6049... Training loss: 1.0367... 0.1158 sec/batch\n", "Epoch: 14/20... Training Step: 6050... Training loss: 1.1146... 0.1146 sec/batch\n", "Epoch: 14/20... Training Step: 6051... Training loss: 1.0556... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6052... Training loss: 0.9177... 0.1186 sec/batch\n", "Epoch: 14/20... Training Step: 6053... Training loss: 1.0905... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6054... Training loss: 0.9839... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6055... Training loss: 1.1136... 0.1172 sec/batch\n", "Epoch: 14/20... Training Step: 6056... Training loss: 0.9829... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6057... Training loss: 1.0527... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6058... Training loss: 1.1337... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6059... Training loss: 1.0706... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6060... Training loss: 1.0465... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6061... Training loss: 0.9616... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6062... Training loss: 1.0266... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6063... Training loss: 0.9469... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6064... Training loss: 0.9679... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6065... Training loss: 0.8196... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6066... Training loss: 0.8530... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6067... Training loss: 0.9440... 0.1133 sec/batch\n", "Epoch: 14/20... Training Step: 6068... Training loss: 0.9537... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6069... Training loss: 1.0813... 0.1194 sec/batch\n", "Epoch: 14/20... Training Step: 6070... Training loss: 0.9330... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6071... Training loss: 1.0285... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6072... Training loss: 1.2696... 0.1117 sec/batch\n", "Epoch: 14/20... Training Step: 6073... Training loss: 0.9434... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6074... Training loss: 1.0306... 0.1209 sec/batch\n", "Epoch: 14/20... Training Step: 6075... Training loss: 1.0857... 0.1173 sec/batch\n", "Epoch: 14/20... Training Step: 6076... Training loss: 0.9824... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6077... Training loss: 1.0387... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6078... Training loss: 0.9971... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6079... Training loss: 1.0759... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6080... Training loss: 0.9997... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6081... Training loss: 1.0406... 0.1212 sec/batch\n", "Epoch: 14/20... Training Step: 6082... Training loss: 1.0174... 0.1167 sec/batch\n", "Epoch: 14/20... Training Step: 6083... Training loss: 0.9687... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6084... Training loss: 0.9207... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6085... Training loss: 0.9838... 0.1219 sec/batch\n", "Epoch: 14/20... Training Step: 6086... Training loss: 0.8567... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6087... Training loss: 0.9781... 0.1161 sec/batch\n", "Epoch: 14/20... Training Step: 6088... Training loss: 0.9774... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6089... Training loss: 1.0788... 0.1218 sec/batch\n", "Epoch: 14/20... Training Step: 6090... Training loss: 1.1065... 0.1198 sec/batch\n", "Epoch: 14/20... Training Step: 6091... Training loss: 0.8738... 0.1176 sec/batch\n", "Epoch: 14/20... Training Step: 6092... Training loss: 0.9117... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6093... Training loss: 0.9346... 0.1149 sec/batch\n", "Epoch: 14/20... Training Step: 6094... Training loss: 1.1451... 0.1127 sec/batch\n", "Epoch: 14/20... Training Step: 6095... Training loss: 0.9118... 0.1138 sec/batch\n", "Epoch: 14/20... Training Step: 6096... Training loss: 1.1009... 0.1167 sec/batch\n", "Epoch: 14/20... Training Step: 6097... Training loss: 0.7968... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6098... Training loss: 1.0258... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6099... Training loss: 0.9808... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6100... Training loss: 1.1356... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6101... Training loss: 0.9594... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6102... Training loss: 1.0993... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6103... Training loss: 1.1937... 0.1212 sec/batch\n", "Epoch: 14/20... Training Step: 6104... Training loss: 0.8838... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6105... Training loss: 1.0027... 0.1207 sec/batch\n", "Epoch: 14/20... Training Step: 6106... Training loss: 0.8681... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6107... Training loss: 1.1555... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6108... Training loss: 0.9221... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6109... Training loss: 0.9617... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6110... Training loss: 1.0764... 0.1166 sec/batch\n", "Epoch: 14/20... Training Step: 6111... Training loss: 1.1041... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6112... Training loss: 0.8907... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6113... Training loss: 1.1636... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6114... Training loss: 1.0236... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6115... Training loss: 0.9199... 0.1151 sec/batch\n", "Epoch: 14/20... Training Step: 6116... Training loss: 1.0834... 0.1130 sec/batch\n", "Epoch: 14/20... Training Step: 6117... Training loss: 0.9698... 0.1138 sec/batch\n", "Epoch: 14/20... Training Step: 6118... Training loss: 1.2158... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6119... Training loss: 0.8816... 0.1087 sec/batch\n", "Epoch: 14/20... Training Step: 6120... Training loss: 1.2221... 0.1152 sec/batch\n", "Epoch: 14/20... Training Step: 6121... Training loss: 1.1928... 0.1118 sec/batch\n", "Epoch: 14/20... Training Step: 6122... Training loss: 1.0258... 0.1153 sec/batch\n", "Epoch: 14/20... Training Step: 6123... Training loss: 1.1122... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6124... Training loss: 1.1394... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6125... Training loss: 0.8936... 0.1224 sec/batch\n", "Epoch: 14/20... Training Step: 6126... Training loss: 1.2011... 0.1167 sec/batch\n", "Epoch: 14/20... Training Step: 6127... Training loss: 1.0308... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6128... Training loss: 1.0308... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6129... Training loss: 1.0679... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6130... Training loss: 1.0780... 0.1175 sec/batch\n", "Epoch: 14/20... Training Step: 6131... Training loss: 1.0733... 0.1178 sec/batch\n", "Epoch: 14/20... Training Step: 6132... Training loss: 1.0537... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6133... Training loss: 1.1148... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6134... Training loss: 1.1783... 0.1220 sec/batch\n", "Epoch: 14/20... Training Step: 6135... Training loss: 1.1921... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6136... Training loss: 1.1196... 0.1178 sec/batch\n", "Epoch: 14/20... Training Step: 6137... Training loss: 1.2029... 0.1150 sec/batch\n", "Epoch: 14/20... Training Step: 6138... Training loss: 1.0645... 0.1160 sec/batch\n", "Epoch: 14/20... Training Step: 6139... Training loss: 0.9903... 0.1213 sec/batch\n", "Epoch: 14/20... Training Step: 6140... Training loss: 1.1413... 0.1199 sec/batch\n", "Epoch: 14/20... Training Step: 6141... Training loss: 1.0815... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6142... Training loss: 0.9366... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6143... Training loss: 0.9656... 0.1200 sec/batch\n", "Epoch: 14/20... Training Step: 6144... Training loss: 0.9758... 0.1143 sec/batch\n", "Epoch: 14/20... Training Step: 6145... Training loss: 1.0252... 0.1176 sec/batch\n", "Epoch: 14/20... Training Step: 6146... Training loss: 1.2684... 0.1213 sec/batch\n", "Epoch: 14/20... Training Step: 6147... Training loss: 1.1370... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6148... Training loss: 0.9744... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6149... Training loss: 1.0066... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6150... Training loss: 0.9685... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6151... Training loss: 1.0640... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6152... Training loss: 0.9032... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6153... Training loss: 1.1268... 0.1213 sec/batch\n", "Epoch: 14/20... Training Step: 6154... Training loss: 1.1569... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6155... Training loss: 1.0851... 0.1180 sec/batch\n", "Epoch: 14/20... Training Step: 6156... Training loss: 1.0266... 0.1135 sec/batch\n", "Epoch: 14/20... Training Step: 6157... Training loss: 1.0239... 0.1167 sec/batch\n", "Epoch: 14/20... Training Step: 6158... Training loss: 0.9686... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6159... Training loss: 1.0969... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6160... Training loss: 1.1447... 0.1172 sec/batch\n", "Epoch: 14/20... Training Step: 6161... Training loss: 1.0187... 0.1186 sec/batch\n", "Epoch: 14/20... Training Step: 6162... Training loss: 1.0395... 0.1216 sec/batch\n", "Epoch: 14/20... Training Step: 6163... Training loss: 1.0642... 0.1147 sec/batch\n", "Epoch: 14/20... Training Step: 6164... Training loss: 1.0620... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6165... Training loss: 0.9606... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6166... Training loss: 1.1632... 0.1213 sec/batch\n", "Epoch: 14/20... Training Step: 6167... Training loss: 1.0257... 0.1152 sec/batch\n", "Epoch: 14/20... Training Step: 6168... Training loss: 1.0209... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6169... Training loss: 0.9277... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6170... Training loss: 1.0728... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6171... Training loss: 1.0634... 0.1155 sec/batch\n", "Epoch: 14/20... Training Step: 6172... Training loss: 0.9896... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6173... Training loss: 0.8330... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6174... Training loss: 0.9357... 0.1235 sec/batch\n", "Epoch: 14/20... Training Step: 6175... Training loss: 1.0683... 0.1239 sec/batch\n", "Epoch: 14/20... Training Step: 6176... Training loss: 1.1600... 0.1229 sec/batch\n", "Epoch: 14/20... Training Step: 6177... Training loss: 0.9302... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6178... Training loss: 0.9992... 0.1108 sec/batch\n", "Epoch: 14/20... Training Step: 6179... Training loss: 1.0094... 0.1130 sec/batch\n", "Epoch: 14/20... Training Step: 6180... Training loss: 0.9302... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6181... Training loss: 1.0559... 0.1161 sec/batch\n", "Epoch: 14/20... Training Step: 6182... Training loss: 1.0481... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6183... Training loss: 0.9837... 0.1199 sec/batch\n", "Epoch: 14/20... Training Step: 6184... Training loss: 1.0992... 0.1191 sec/batch\n", "Epoch: 14/20... Training Step: 6185... Training loss: 1.1908... 0.1204 sec/batch\n", "Epoch: 14/20... Training Step: 6186... Training loss: 1.0263... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6187... Training loss: 1.0046... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6188... Training loss: 0.9674... 0.1200 sec/batch\n", "Epoch: 14/20... Training Step: 6189... Training loss: 1.0302... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6190... Training loss: 0.9790... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6191... Training loss: 0.9789... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6192... Training loss: 1.0191... 0.1227 sec/batch\n", "Epoch: 14/20... Training Step: 6193... Training loss: 1.0524... 0.1194 sec/batch\n", "Epoch: 14/20... Training Step: 6194... Training loss: 0.9542... 0.1199 sec/batch\n", "Epoch: 14/20... Training Step: 6195... Training loss: 1.2836... 0.1204 sec/batch\n", "Epoch: 14/20... Training Step: 6196... Training loss: 0.9629... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6197... Training loss: 0.9843... 0.1209 sec/batch\n", "Epoch: 14/20... Training Step: 6198... Training loss: 1.0029... 0.1150 sec/batch\n", "Epoch: 14/20... Training Step: 6199... Training loss: 1.0015... 0.1198 sec/batch\n", "Epoch: 14/20... Training Step: 6200... Training loss: 1.2660... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6201... Training loss: 1.0076... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6202... Training loss: 1.1125... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6203... Training loss: 1.1167... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6204... Training loss: 1.0816... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6205... Training loss: 0.8679... 0.1208 sec/batch\n", "Epoch: 14/20... Training Step: 6206... Training loss: 1.0603... 0.1167 sec/batch\n", "Epoch: 14/20... Training Step: 6207... Training loss: 1.0702... 0.1218 sec/batch\n", "Epoch: 14/20... Training Step: 6208... Training loss: 0.9660... 0.1140 sec/batch\n", "Epoch: 14/20... Training Step: 6209... Training loss: 0.9270... 0.1191 sec/batch\n", "Epoch: 14/20... Training Step: 6210... Training loss: 1.2484... 0.1127 sec/batch\n", "Epoch: 14/20... Training Step: 6211... Training loss: 0.9675... 0.1150 sec/batch\n", "Epoch: 14/20... Training Step: 6212... Training loss: 1.0959... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6213... Training loss: 0.8406... 0.1153 sec/batch\n", "Epoch: 14/20... Training Step: 6214... Training loss: 1.2503... 0.1206 sec/batch\n", "Epoch: 14/20... Training Step: 6215... Training loss: 1.1344... 0.1197 sec/batch\n", "Epoch: 14/20... Training Step: 6216... Training loss: 0.9361... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6217... Training loss: 1.0893... 0.1223 sec/batch\n", "Epoch: 14/20... Training Step: 6218... Training loss: 1.0839... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6219... Training loss: 1.1129... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6220... Training loss: 0.9389... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6221... Training loss: 1.1668... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6222... Training loss: 0.9710... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6223... Training loss: 0.9363... 0.1189 sec/batch\n", "Epoch: 14/20... Training Step: 6224... Training loss: 1.0183... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6225... Training loss: 1.0922... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6226... Training loss: 1.0419... 0.1165 sec/batch\n", "Epoch: 14/20... Training Step: 6227... Training loss: 1.0978... 0.1178 sec/batch\n", "Epoch: 14/20... Training Step: 6228... Training loss: 0.9899... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6229... Training loss: 1.0524... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6230... Training loss: 1.1951... 0.1189 sec/batch\n", "Epoch: 14/20... Training Step: 6231... Training loss: 0.7932... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6232... Training loss: 1.0639... 0.1161 sec/batch\n", "Epoch: 14/20... Training Step: 6233... Training loss: 1.0269... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6234... Training loss: 1.2400... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6235... Training loss: 1.0549... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6236... Training loss: 1.2438... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6237... Training loss: 1.0145... 0.1249 sec/batch\n", "Epoch: 14/20... Training Step: 6238... Training loss: 0.9624... 0.1336 sec/batch\n", "Epoch: 14/20... Training Step: 6239... Training loss: 1.0088... 0.1172 sec/batch\n", "Epoch: 14/20... Training Step: 6240... Training loss: 1.0412... 0.1144 sec/batch\n", "Epoch: 14/20... Training Step: 6241... Training loss: 1.1553... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6242... Training loss: 0.8003... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6243... Training loss: 1.0273... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6244... Training loss: 1.0985... 0.1212 sec/batch\n", "Epoch: 14/20... Training Step: 6245... Training loss: 1.0781... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6246... Training loss: 1.0827... 0.1215 sec/batch\n", "Epoch: 14/20... Training Step: 6247... Training loss: 1.1260... 0.1295 sec/batch\n", "Epoch: 14/20... Training Step: 6248... Training loss: 0.9234... 0.1228 sec/batch\n", "Epoch: 14/20... Training Step: 6249... Training loss: 1.1417... 0.1244 sec/batch\n", "Epoch: 14/20... Training Step: 6250... Training loss: 1.1762... 0.1239 sec/batch\n", "Epoch: 14/20... Training Step: 6251... Training loss: 1.2129... 0.1265 sec/batch\n", "Epoch: 14/20... Training Step: 6252... Training loss: 1.1257... 0.1361 sec/batch\n", "Epoch: 14/20... Training Step: 6253... Training loss: 0.8510... 0.1369 sec/batch\n", "Epoch: 14/20... Training Step: 6254... Training loss: 1.3085... 0.1266 sec/batch\n", "Epoch: 14/20... Training Step: 6255... Training loss: 1.0438... 0.1361 sec/batch\n", "Epoch: 14/20... Training Step: 6256... Training loss: 1.2506... 0.1340 sec/batch\n", "Epoch: 14/20... Training Step: 6257... Training loss: 1.1083... 0.1230 sec/batch\n", "Epoch: 14/20... Training Step: 6258... Training loss: 1.1652... 0.1295 sec/batch\n", "Epoch: 14/20... Training Step: 6259... Training loss: 1.3196... 0.1238 sec/batch\n", "Epoch: 14/20... Training Step: 6260... Training loss: 1.0479... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6261... Training loss: 1.1389... 0.1175 sec/batch\n", "Epoch: 14/20... Training Step: 6262... Training loss: 0.9657... 0.1145 sec/batch\n", "Epoch: 14/20... Training Step: 6263... Training loss: 1.1744... 0.1226 sec/batch\n", "Epoch: 14/20... Training Step: 6264... Training loss: 1.1350... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6265... Training loss: 1.2018... 0.1206 sec/batch\n", "Epoch: 14/20... Training Step: 6266... Training loss: 1.1414... 0.1369 sec/batch\n", "Epoch: 14/20... Training Step: 6267... Training loss: 1.2602... 0.1540 sec/batch\n", "Epoch: 14/20... Training Step: 6268... Training loss: 1.0825... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6269... Training loss: 1.1693... 0.1331 sec/batch\n", "Epoch: 14/20... Training Step: 6270... Training loss: 0.9710... 0.1227 sec/batch\n", "Epoch: 14/20... Training Step: 6271... Training loss: 1.0783... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6272... Training loss: 1.2176... 0.1224 sec/batch\n", "Epoch: 14/20... Training Step: 6273... Training loss: 1.1114... 0.1201 sec/batch\n", "Epoch: 14/20... Training Step: 6274... Training loss: 1.0065... 0.1206 sec/batch\n", "Epoch: 14/20... Training Step: 6275... Training loss: 1.1669... 0.1158 sec/batch\n", "Epoch: 14/20... Training Step: 6276... Training loss: 1.0916... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6277... Training loss: 1.0807... 0.1212 sec/batch\n", "Epoch: 14/20... Training Step: 6278... Training loss: 0.9392... 0.1191 sec/batch\n", "Epoch: 14/20... Training Step: 6279... Training loss: 1.1110... 0.1176 sec/batch\n", "Epoch: 14/20... Training Step: 6280... Training loss: 1.1069... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6281... Training loss: 1.0992... 0.1165 sec/batch\n", "Epoch: 14/20... Training Step: 6282... Training loss: 1.1012... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6283... Training loss: 1.0689... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6284... Training loss: 1.0458... 0.1186 sec/batch\n", "Epoch: 14/20... Training Step: 6285... Training loss: 1.0230... 0.1206 sec/batch\n", "Epoch: 14/20... Training Step: 6286... Training loss: 1.0363... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6287... Training loss: 1.0226... 0.1220 sec/batch\n", "Epoch: 14/20... Training Step: 6288... Training loss: 1.1075... 0.1196 sec/batch\n", "Epoch: 14/20... Training Step: 6289... Training loss: 1.1400... 0.1209 sec/batch\n", "Epoch: 14/20... Training Step: 6290... Training loss: 0.9389... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6291... Training loss: 0.9802... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6292... Training loss: 1.0128... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6293... Training loss: 1.0792... 0.1157 sec/batch\n", "Epoch: 14/20... Training Step: 6294... Training loss: 1.1787... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6295... Training loss: 0.9909... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6296... Training loss: 1.1521... 0.1149 sec/batch\n", "Epoch: 14/20... Training Step: 6297... Training loss: 1.0679... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6298... Training loss: 0.9963... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6299... Training loss: 1.2207... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6300... Training loss: 1.1573... 0.1110 sec/batch\n", "Epoch: 14/20... Training Step: 6301... Training loss: 1.1256... 0.1201 sec/batch\n", "Epoch: 14/20... Training Step: 6302... Training loss: 1.0474... 0.1228 sec/batch\n", "Epoch: 14/20... Training Step: 6303... Training loss: 1.0965... 0.1270 sec/batch\n", "Epoch: 14/20... Training Step: 6304... Training loss: 1.1614... 0.1226 sec/batch\n", "Epoch: 14/20... Training Step: 6305... Training loss: 1.2317... 0.1272 sec/batch\n", "Epoch: 14/20... Training Step: 6306... Training loss: 1.1865... 0.1188 sec/batch\n", "Epoch: 14/20... Training Step: 6307... Training loss: 1.0834... 0.1234 sec/batch\n", "Epoch: 14/20... Training Step: 6308... Training loss: 1.0725... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6309... Training loss: 0.9982... 0.1180 sec/batch\n", "Epoch: 14/20... Training Step: 6310... Training loss: 1.3003... 0.1265 sec/batch\n", "Epoch: 14/20... Training Step: 6311... Training loss: 1.1485... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6312... Training loss: 1.0416... 0.1214 sec/batch\n", "Epoch: 14/20... Training Step: 6313... Training loss: 1.0824... 0.1286 sec/batch\n", "Epoch: 14/20... Training Step: 6314... Training loss: 1.0028... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6315... Training loss: 0.9683... 0.1183 sec/batch\n", "Epoch: 14/20... Training Step: 6316... Training loss: 1.0979... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6317... Training loss: 0.9491... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6318... Training loss: 0.9500... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6319... Training loss: 1.0420... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6320... Training loss: 1.0673... 0.1160 sec/batch\n", "Epoch: 14/20... Training Step: 6321... Training loss: 1.1374... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6322... Training loss: 1.1389... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6323... Training loss: 1.0871... 0.1173 sec/batch\n", "Epoch: 14/20... Training Step: 6324... Training loss: 1.0488... 0.1183 sec/batch\n", "Epoch: 14/20... Training Step: 6325... Training loss: 0.9894... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6326... Training loss: 1.1681... 0.1194 sec/batch\n", "Epoch: 14/20... Training Step: 6327... Training loss: 1.1304... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6328... Training loss: 1.3254... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6329... Training loss: 0.9449... 0.1183 sec/batch\n", "Epoch: 14/20... Training Step: 6330... Training loss: 1.0695... 0.1198 sec/batch\n", "Epoch: 14/20... Training Step: 6331... Training loss: 0.9940... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6332... Training loss: 1.1792... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6333... Training loss: 1.1351... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6334... Training loss: 0.9843... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6335... Training loss: 0.8815... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6336... Training loss: 1.3051... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6337... Training loss: 1.0715... 0.1216 sec/batch\n", "Epoch: 14/20... Training Step: 6338... Training loss: 1.1297... 0.1175 sec/batch\n", "Epoch: 14/20... Training Step: 6339... Training loss: 1.1352... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6340... Training loss: 1.3274... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6341... Training loss: 0.9987... 0.1173 sec/batch\n", "Epoch: 14/20... Training Step: 6342... Training loss: 1.1374... 0.1133 sec/batch\n", "Epoch: 14/20... Training Step: 6343... Training loss: 1.0737... 0.1113 sec/batch\n", "Epoch: 14/20... Training Step: 6344... Training loss: 0.9686... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6345... Training loss: 0.9744... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6346... Training loss: 0.9467... 0.1189 sec/batch\n", "Epoch: 14/20... Training Step: 6347... Training loss: 0.9367... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6348... Training loss: 0.9322... 0.1279 sec/batch\n", "Epoch: 14/20... Training Step: 6349... Training loss: 0.9013... 0.1295 sec/batch\n", "Epoch: 14/20... Training Step: 6350... Training loss: 0.8782... 0.1256 sec/batch\n", "Epoch: 14/20... Training Step: 6351... Training loss: 1.1037... 0.1288 sec/batch\n", "Epoch: 14/20... Training Step: 6352... Training loss: 0.9403... 0.1248 sec/batch\n", "Epoch: 14/20... Training Step: 6353... Training loss: 0.9096... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6354... Training loss: 1.2227... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6355... Training loss: 0.9494... 0.1189 sec/batch\n", "Epoch: 14/20... Training Step: 6356... Training loss: 1.0220... 0.1148 sec/batch\n", "Epoch: 14/20... Training Step: 6357... Training loss: 0.8881... 0.1171 sec/batch\n", "Epoch: 14/20... Training Step: 6358... Training loss: 0.7877... 0.1223 sec/batch\n", "Epoch: 14/20... Training Step: 6359... Training loss: 0.9653... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6360... Training loss: 0.8959... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6361... Training loss: 1.0951... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6362... Training loss: 1.1210... 0.1150 sec/batch\n", "Epoch: 14/20... Training Step: 6363... Training loss: 1.0734... 0.1207 sec/batch\n", "Epoch: 14/20... Training Step: 6364... Training loss: 1.0055... 0.1146 sec/batch\n", "Epoch: 14/20... Training Step: 6365... Training loss: 0.8983... 0.1206 sec/batch\n", "Epoch: 14/20... Training Step: 6366... Training loss: 1.0385... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6367... Training loss: 1.0853... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6368... Training loss: 0.9644... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6369... Training loss: 0.9516... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6370... Training loss: 0.8981... 0.1115 sec/batch\n", "Epoch: 14/20... Training Step: 6371... Training loss: 1.0400... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6372... Training loss: 0.9828... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6373... Training loss: 0.9521... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6374... Training loss: 1.0644... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6375... Training loss: 0.8286... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6376... Training loss: 1.1601... 0.1130 sec/batch\n", "Epoch: 14/20... Training Step: 6377... Training loss: 0.9789... 0.1157 sec/batch\n", "Epoch: 14/20... Training Step: 6378... Training loss: 0.8578... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6379... Training loss: 0.9312... 0.1196 sec/batch\n", "Epoch: 14/20... Training Step: 6380... Training loss: 1.2524... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6381... Training loss: 0.9438... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6382... Training loss: 1.0497... 0.1208 sec/batch\n", "Epoch: 14/20... Training Step: 6383... Training loss: 0.9257... 0.1214 sec/batch\n", "Epoch: 14/20... Training Step: 6384... Training loss: 1.0236... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6385... Training loss: 0.8407... 0.1218 sec/batch\n", "Epoch: 14/20... Training Step: 6386... Training loss: 0.8575... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6387... Training loss: 1.0574... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6388... Training loss: 1.0353... 0.1215 sec/batch\n", "Epoch: 14/20... Training Step: 6389... Training loss: 0.8543... 0.1173 sec/batch\n", "Epoch: 14/20... Training Step: 6390... Training loss: 1.0560... 0.1161 sec/batch\n", "Epoch: 14/20... Training Step: 6391... Training loss: 1.2402... 0.1232 sec/batch\n", "Epoch: 14/20... Training Step: 6392... Training loss: 0.8486... 0.1186 sec/batch\n", "Epoch: 14/20... Training Step: 6393... Training loss: 1.0563... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6394... Training loss: 1.1088... 0.1153 sec/batch\n", "Epoch: 14/20... Training Step: 6395... Training loss: 0.9646... 0.1162 sec/batch\n", "Epoch: 14/20... Training Step: 6396... Training loss: 0.9813... 0.1224 sec/batch\n", "Epoch: 14/20... Training Step: 6397... Training loss: 0.9595... 0.1224 sec/batch\n", "Epoch: 14/20... Training Step: 6398... Training loss: 1.0756... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6399... Training loss: 0.9937... 0.1155 sec/batch\n", "Epoch: 14/20... Training Step: 6400... Training loss: 1.0924... 0.1140 sec/batch\n", "Epoch: 14/20... Training Step: 6401... Training loss: 1.0441... 0.1198 sec/batch\n", "Epoch: 14/20... Training Step: 6402... Training loss: 1.1455... 0.1207 sec/batch\n", "Epoch: 14/20... Training Step: 6403... Training loss: 0.9106... 0.1172 sec/batch\n", "Epoch: 14/20... Training Step: 6404... Training loss: 1.0306... 0.1218 sec/batch\n", "Epoch: 14/20... Training Step: 6405... Training loss: 0.9194... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6406... Training loss: 1.0911... 0.1306 sec/batch\n", "Epoch: 14/20... Training Step: 6407... Training loss: 1.0538... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6408... Training loss: 1.0753... 0.1147 sec/batch\n", "Epoch: 14/20... Training Step: 6409... Training loss: 1.1767... 0.1156 sec/batch\n", "Epoch: 14/20... Training Step: 6410... Training loss: 1.0190... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6411... Training loss: 1.1205... 0.1161 sec/batch\n", "Epoch: 14/20... Training Step: 6412... Training loss: 1.0595... 0.1207 sec/batch\n", "Epoch: 14/20... Training Step: 6413... Training loss: 0.9372... 0.1201 sec/batch\n", "Epoch: 14/20... Training Step: 6414... Training loss: 0.9471... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6415... Training loss: 0.9019... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6416... Training loss: 0.9762... 0.1172 sec/batch\n", "Epoch: 14/20... Training Step: 6417... Training loss: 0.9473... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6418... Training loss: 0.9947... 0.1159 sec/batch\n", "Epoch: 14/20... Training Step: 6419... Training loss: 1.0144... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6420... Training loss: 1.0289... 0.1140 sec/batch\n", "Epoch: 14/20... Training Step: 6421... Training loss: 0.8719... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6422... Training loss: 0.9312... 0.1203 sec/batch\n", "Epoch: 14/20... Training Step: 6423... Training loss: 1.0465... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6424... Training loss: 0.9681... 0.1165 sec/batch\n", "Epoch: 14/20... Training Step: 6425... Training loss: 1.1075... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6426... Training loss: 1.0145... 0.1160 sec/batch\n", "Epoch: 14/20... Training Step: 6427... Training loss: 0.9441... 0.1116 sec/batch\n", "Epoch: 14/20... Training Step: 6428... Training loss: 1.1169... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6429... Training loss: 0.9344... 0.1209 sec/batch\n", "Epoch: 14/20... Training Step: 6430... Training loss: 0.9738... 0.1186 sec/batch\n", "Epoch: 14/20... Training Step: 6431... Training loss: 1.0245... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6432... Training loss: 1.1152... 0.1128 sec/batch\n", "Epoch: 14/20... Training Step: 6433... Training loss: 0.9822... 0.1116 sec/batch\n", "Epoch: 14/20... Training Step: 6434... Training loss: 1.0283... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6435... Training loss: 0.9509... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6436... Training loss: 1.0086... 0.1155 sec/batch\n", "Epoch: 14/20... Training Step: 6437... Training loss: 1.0257... 0.1265 sec/batch\n", "Epoch: 14/20... Training Step: 6438... Training loss: 1.2245... 0.1299 sec/batch\n", "Epoch: 14/20... Training Step: 6439... Training loss: 0.9468... 0.1213 sec/batch\n", "Epoch: 14/20... Training Step: 6440... Training loss: 1.3210... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6441... Training loss: 0.8813... 0.1141 sec/batch\n", "Epoch: 14/20... Training Step: 6442... Training loss: 0.9792... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6443... Training loss: 0.9356... 0.1190 sec/batch\n", "Epoch: 14/20... Training Step: 6444... Training loss: 1.0518... 0.1205 sec/batch\n", "Epoch: 14/20... Training Step: 6445... Training loss: 1.0451... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6446... Training loss: 1.0324... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6447... Training loss: 1.0358... 0.1200 sec/batch\n", "Epoch: 14/20... Training Step: 6448... Training loss: 1.1485... 0.1135 sec/batch\n", "Epoch: 14/20... Training Step: 6449... Training loss: 1.0353... 0.1158 sec/batch\n", "Epoch: 14/20... Training Step: 6450... Training loss: 0.9186... 0.1163 sec/batch\n", "Epoch: 14/20... Training Step: 6451... Training loss: 0.8943... 0.1189 sec/batch\n", "Epoch: 14/20... Training Step: 6452... Training loss: 0.8637... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6453... Training loss: 1.0143... 0.1168 sec/batch\n", "Epoch: 14/20... Training Step: 6454... Training loss: 1.0244... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6455... Training loss: 0.9745... 0.1160 sec/batch\n", "Epoch: 14/20... Training Step: 6456... Training loss: 1.1139... 0.1170 sec/batch\n", "Epoch: 14/20... Training Step: 6457... Training loss: 0.9066... 0.1202 sec/batch\n", "Epoch: 14/20... Training Step: 6458... Training loss: 1.0449... 0.1115 sec/batch\n", "Epoch: 14/20... Training Step: 6459... Training loss: 1.0001... 0.1183 sec/batch\n", "Epoch: 14/20... Training Step: 6460... Training loss: 0.9708... 0.1201 sec/batch\n", "Epoch: 14/20... Training Step: 6461... Training loss: 0.8630... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6462... Training loss: 0.9434... 0.1196 sec/batch\n", "Epoch: 14/20... Training Step: 6463... Training loss: 1.1453... 0.1181 sec/batch\n", "Epoch: 14/20... Training Step: 6464... Training loss: 1.0092... 0.1140 sec/batch\n", "Epoch: 14/20... Training Step: 6465... Training loss: 1.2091... 0.1195 sec/batch\n", "Epoch: 14/20... Training Step: 6466... Training loss: 1.1181... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6467... Training loss: 0.9178... 0.1216 sec/batch\n", "Epoch: 14/20... Training Step: 6468... Training loss: 1.0125... 0.1193 sec/batch\n", "Epoch: 14/20... Training Step: 6469... Training loss: 0.9544... 0.1185 sec/batch\n", "Epoch: 14/20... Training Step: 6470... Training loss: 1.0264... 0.1214 sec/batch\n", "Epoch: 14/20... Training Step: 6471... Training loss: 1.2917... 0.1200 sec/batch\n", "Epoch: 14/20... Training Step: 6472... Training loss: 1.1969... 0.1139 sec/batch\n", "Epoch: 14/20... Training Step: 6473... Training loss: 0.8943... 0.1211 sec/batch\n", "Epoch: 14/20... Training Step: 6474... Training loss: 1.0267... 0.1187 sec/batch\n", "Epoch: 14/20... Training Step: 6475... Training loss: 1.0499... 0.1169 sec/batch\n", "Epoch: 14/20... Training Step: 6476... Training loss: 1.0705... 0.1145 sec/batch\n", "Epoch: 14/20... Training Step: 6477... Training loss: 1.0031... 0.1149 sec/batch\n", "Epoch: 14/20... Training Step: 6478... Training loss: 1.0499... 0.1224 sec/batch\n", "Epoch: 14/20... Training Step: 6479... Training loss: 1.0875... 0.1177 sec/batch\n", "Epoch: 14/20... Training Step: 6480... Training loss: 0.8728... 0.1179 sec/batch\n", "Epoch: 14/20... Training Step: 6481... Training loss: 1.0427... 0.1173 sec/batch\n", "Epoch: 14/20... Training Step: 6482... Training loss: 0.9039... 0.1182 sec/batch\n", "Epoch: 14/20... Training Step: 6483... Training loss: 0.9467... 0.1210 sec/batch\n", "Epoch: 14/20... Training Step: 6484... Training loss: 0.9465... 0.1113 sec/batch\n", "Epoch: 14/20... Training Step: 6485... Training loss: 0.9940... 0.1174 sec/batch\n", "Epoch: 14/20... Training Step: 6486... Training loss: 1.2397... 0.1184 sec/batch\n", "Epoch: 14/20... Training Step: 6487... Training loss: 1.1351... 0.1148 sec/batch\n", "Epoch: 14/20... Training Step: 6488... Training loss: 0.9120... 0.1192 sec/batch\n", "Epoch: 14/20... Training Step: 6489... Training loss: 0.9764... 0.1204 sec/batch\n", "Epoch: 14/20... Training Step: 6490... Training loss: 0.8268... 0.1130 sec/batch\n", "Epoch: 14/20... Training Step: 6491... Training loss: 0.8747... 0.1220 sec/batch\n", "Epoch: 14/20... Training Step: 6492... Training loss: 1.0247... 0.1216 sec/batch\n", "Epoch: 14/20... Training Step: 6493... Training loss: 1.1252... 0.1148 sec/batch\n", "Epoch: 14/20... Training Step: 6494... Training loss: 0.9685... 0.1164 sec/batch\n", "Epoch: 14/20... Training Step: 6495... Training loss: 1.0748... 0.1154 sec/batch\n", "Epoch: 14/20... Training Step: 6496... Training loss: 1.0625... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6497... Training loss: 1.2148... 0.1206 sec/batch\n", "Epoch: 15/20... Training Step: 6498... Training loss: 1.0638... 0.1208 sec/batch\n", "Epoch: 15/20... Training Step: 6499... Training loss: 1.0443... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6500... Training loss: 1.1011... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6501... Training loss: 1.1199... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6502... Training loss: 0.9277... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6503... Training loss: 1.0633... 0.1217 sec/batch\n", "Epoch: 15/20... Training Step: 6504... Training loss: 0.9488... 0.1213 sec/batch\n", "Epoch: 15/20... Training Step: 6505... Training loss: 0.9093... 0.1149 sec/batch\n", "Epoch: 15/20... Training Step: 6506... Training loss: 1.0097... 0.1163 sec/batch\n", "Epoch: 15/20... Training Step: 6507... Training loss: 0.9992... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6508... Training loss: 0.9115... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6509... Training loss: 1.2983... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6510... Training loss: 0.7449... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6511... Training loss: 1.1020... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6512... Training loss: 1.0742... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6513... Training loss: 0.9355... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6514... Training loss: 0.9916... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6515... Training loss: 1.0735... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6516... Training loss: 0.8985... 0.1230 sec/batch\n", "Epoch: 15/20... Training Step: 6517... Training loss: 1.1651... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6518... Training loss: 0.8727... 0.1177 sec/batch\n", "Epoch: 15/20... Training Step: 6519... Training loss: 0.9992... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6520... Training loss: 0.9217... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6521... Training loss: 0.9802... 0.1205 sec/batch\n", "Epoch: 15/20... Training Step: 6522... Training loss: 1.0753... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6523... Training loss: 1.0089... 0.1140 sec/batch\n", "Epoch: 15/20... Training Step: 6524... Training loss: 0.9454... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6525... Training loss: 0.9372... 0.1179 sec/batch\n", "Epoch: 15/20... Training Step: 6526... Training loss: 1.0956... 0.1136 sec/batch\n", "Epoch: 15/20... Training Step: 6527... Training loss: 0.9237... 0.1130 sec/batch\n", "Epoch: 15/20... Training Step: 6528... Training loss: 0.9975... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6529... Training loss: 0.8425... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6530... Training loss: 0.9580... 0.1186 sec/batch\n", "Epoch: 15/20... Training Step: 6531... Training loss: 0.9338... 0.1235 sec/batch\n", "Epoch: 15/20... Training Step: 6532... Training loss: 1.0316... 0.1198 sec/batch\n", "Epoch: 15/20... Training Step: 6533... Training loss: 0.9630... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6534... Training loss: 0.9514... 0.1199 sec/batch\n", "Epoch: 15/20... Training Step: 6535... Training loss: 0.9280... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6536... Training loss: 1.1863... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6537... Training loss: 0.9145... 0.1201 sec/batch\n", "Epoch: 15/20... Training Step: 6538... Training loss: 0.9129... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6539... Training loss: 1.0702... 0.1158 sec/batch\n", "Epoch: 15/20... Training Step: 6540... Training loss: 0.8249... 0.1218 sec/batch\n", "Epoch: 15/20... Training Step: 6541... Training loss: 0.8341... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6542... Training loss: 0.9770... 0.1165 sec/batch\n", "Epoch: 15/20... Training Step: 6543... Training loss: 1.0112... 0.1215 sec/batch\n", "Epoch: 15/20... Training Step: 6544... Training loss: 1.0506... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6545... Training loss: 0.9560... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6546... Training loss: 0.9887... 0.1146 sec/batch\n", "Epoch: 15/20... Training Step: 6547... Training loss: 0.9952... 0.1158 sec/batch\n", "Epoch: 15/20... Training Step: 6548... Training loss: 0.9991... 0.1236 sec/batch\n", "Epoch: 15/20... Training Step: 6549... Training loss: 0.9890... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6550... Training loss: 1.0352... 0.1111 sec/batch\n", "Epoch: 15/20... Training Step: 6551... Training loss: 0.9910... 0.1156 sec/batch\n", "Epoch: 15/20... Training Step: 6552... Training loss: 0.9242... 0.1246 sec/batch\n", "Epoch: 15/20... Training Step: 6553... Training loss: 0.9414... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6554... Training loss: 0.9151... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6555... Training loss: 0.8432... 0.1144 sec/batch\n", "Epoch: 15/20... Training Step: 6556... Training loss: 0.9375... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6557... Training loss: 0.8462... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6558... Training loss: 0.9743... 0.1198 sec/batch\n", "Epoch: 15/20... Training Step: 6559... Training loss: 0.9561... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6560... Training loss: 1.0516... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6561... Training loss: 0.9668... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6562... Training loss: 1.0971... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6563... Training loss: 1.0778... 0.1134 sec/batch\n", "Epoch: 15/20... Training Step: 6564... Training loss: 1.0977... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6565... Training loss: 1.0055... 0.1208 sec/batch\n", "Epoch: 15/20... Training Step: 6566... Training loss: 0.9878... 0.1156 sec/batch\n", "Epoch: 15/20... Training Step: 6567... Training loss: 1.1049... 0.1180 sec/batch\n", "Epoch: 15/20... Training Step: 6568... Training loss: 0.9652... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6569... Training loss: 0.9011... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6570... Training loss: 0.8016... 0.1140 sec/batch\n", "Epoch: 15/20... Training Step: 6571... Training loss: 1.1508... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6572... Training loss: 0.9086... 0.1129 sec/batch\n", "Epoch: 15/20... Training Step: 6573... Training loss: 1.0246... 0.1154 sec/batch\n", "Epoch: 15/20... Training Step: 6574... Training loss: 1.0211... 0.1146 sec/batch\n", "Epoch: 15/20... Training Step: 6575... Training loss: 1.0818... 0.1121 sec/batch\n", "Epoch: 15/20... Training Step: 6576... Training loss: 0.9304... 0.1102 sec/batch\n", "Epoch: 15/20... Training Step: 6577... Training loss: 1.1453... 0.1127 sec/batch\n", "Epoch: 15/20... Training Step: 6578... Training loss: 0.9516... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6579... Training loss: 0.8698... 0.1107 sec/batch\n", "Epoch: 15/20... Training Step: 6580... Training loss: 1.0909... 0.1124 sec/batch\n", "Epoch: 15/20... Training Step: 6581... Training loss: 1.0760... 0.1122 sec/batch\n", "Epoch: 15/20... Training Step: 6582... Training loss: 1.1121... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6583... Training loss: 0.9409... 0.1201 sec/batch\n", "Epoch: 15/20... Training Step: 6584... Training loss: 1.0680... 0.1154 sec/batch\n", "Epoch: 15/20... Training Step: 6585... Training loss: 1.1619... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6586... Training loss: 1.0195... 0.1176 sec/batch\n", "Epoch: 15/20... Training Step: 6587... Training loss: 1.2174... 0.1113 sec/batch\n", "Epoch: 15/20... Training Step: 6588... Training loss: 1.1185... 0.1089 sec/batch\n", "Epoch: 15/20... Training Step: 6589... Training loss: 0.8110... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6590... Training loss: 1.1494... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6591... Training loss: 1.0392... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6592... Training loss: 1.0222... 0.1147 sec/batch\n", "Epoch: 15/20... Training Step: 6593... Training loss: 1.1374... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6594... Training loss: 1.0661... 0.1140 sec/batch\n", "Epoch: 15/20... Training Step: 6595... Training loss: 1.1600... 0.1134 sec/batch\n", "Epoch: 15/20... Training Step: 6596... Training loss: 1.0167... 0.1143 sec/batch\n", "Epoch: 15/20... Training Step: 6597... Training loss: 1.1114... 0.1136 sec/batch\n", "Epoch: 15/20... Training Step: 6598... Training loss: 1.3140... 0.1134 sec/batch\n", "Epoch: 15/20... Training Step: 6599... Training loss: 1.0463... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6600... Training loss: 1.0237... 0.1146 sec/batch\n", "Epoch: 15/20... Training Step: 6601... Training loss: 1.0832... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6602... Training loss: 1.0839... 0.1172 sec/batch\n", "Epoch: 15/20... Training Step: 6603... Training loss: 0.9039... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6604... Training loss: 1.0736... 0.1147 sec/batch\n", "Epoch: 15/20... Training Step: 6605... Training loss: 1.0353... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6606... Training loss: 1.0342... 0.1195 sec/batch\n", "Epoch: 15/20... Training Step: 6607... Training loss: 1.1910... 0.1172 sec/batch\n", "Epoch: 15/20... Training Step: 6608... Training loss: 0.8902... 0.1156 sec/batch\n", "Epoch: 15/20... Training Step: 6609... Training loss: 1.0578... 0.1184 sec/batch\n", "Epoch: 15/20... Training Step: 6610... Training loss: 1.1924... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6611... Training loss: 1.0410... 0.1239 sec/batch\n", "Epoch: 15/20... Training Step: 6612... Training loss: 0.9392... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6613... Training loss: 0.9452... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6614... Training loss: 1.0066... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6615... Training loss: 0.9634... 0.1251 sec/batch\n", "Epoch: 15/20... Training Step: 6616... Training loss: 0.8985... 0.1142 sec/batch\n", "Epoch: 15/20... Training Step: 6617... Training loss: 0.9929... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6618... Training loss: 1.0907... 0.1235 sec/batch\n", "Epoch: 15/20... Training Step: 6619... Training loss: 1.0533... 0.1207 sec/batch\n", "Epoch: 15/20... Training Step: 6620... Training loss: 0.9391... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6621... Training loss: 1.0379... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6622... Training loss: 0.9608... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6623... Training loss: 0.9554... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6624... Training loss: 1.0189... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6625... Training loss: 1.0278... 0.1165 sec/batch\n", "Epoch: 15/20... Training Step: 6626... Training loss: 1.0175... 0.1158 sec/batch\n", "Epoch: 15/20... Training Step: 6627... Training loss: 1.2254... 0.1222 sec/batch\n", "Epoch: 15/20... Training Step: 6628... Training loss: 0.9965... 0.1145 sec/batch\n", "Epoch: 15/20... Training Step: 6629... Training loss: 1.0442... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6630... Training loss: 1.1546... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6631... Training loss: 1.0361... 0.1145 sec/batch\n", "Epoch: 15/20... Training Step: 6632... Training loss: 0.9321... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6633... Training loss: 0.8642... 0.1221 sec/batch\n", "Epoch: 15/20... Training Step: 6634... Training loss: 0.9812... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6635... Training loss: 0.9977... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6636... Training loss: 1.0382... 0.1200 sec/batch\n", "Epoch: 15/20... Training Step: 6637... Training loss: 0.8041... 0.1149 sec/batch\n", "Epoch: 15/20... Training Step: 6638... Training loss: 0.8584... 0.1179 sec/batch\n", "Epoch: 15/20... Training Step: 6639... Training loss: 0.9440... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6640... Training loss: 0.9785... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6641... Training loss: 0.8768... 0.1252 sec/batch\n", "Epoch: 15/20... Training Step: 6642... Training loss: 0.9954... 0.1304 sec/batch\n", "Epoch: 15/20... Training Step: 6643... Training loss: 0.9437... 0.1194 sec/batch\n", "Epoch: 15/20... Training Step: 6644... Training loss: 0.8881... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6645... Training loss: 0.9013... 0.1204 sec/batch\n", "Epoch: 15/20... Training Step: 6646... Training loss: 1.0874... 0.1201 sec/batch\n", "Epoch: 15/20... Training Step: 6647... Training loss: 1.0120... 0.1110 sec/batch\n", "Epoch: 15/20... Training Step: 6648... Training loss: 1.0680... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6649... Training loss: 1.1810... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6650... Training loss: 1.0743... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6651... Training loss: 0.9031... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6652... Training loss: 0.9817... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6653... Training loss: 0.9548... 0.1198 sec/batch\n", "Epoch: 15/20... Training Step: 6654... Training loss: 0.8601... 0.1180 sec/batch\n", "Epoch: 15/20... Training Step: 6655... Training loss: 0.8315... 0.1180 sec/batch\n", "Epoch: 15/20... Training Step: 6656... Training loss: 0.9283... 0.1132 sec/batch\n", "Epoch: 15/20... Training Step: 6657... Training loss: 1.0954... 0.1176 sec/batch\n", "Epoch: 15/20... Training Step: 6658... Training loss: 1.0525... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6659... Training loss: 1.1597... 0.1145 sec/batch\n", "Epoch: 15/20... Training Step: 6660... Training loss: 0.9152... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6661... Training loss: 1.0588... 0.1186 sec/batch\n", "Epoch: 15/20... Training Step: 6662... Training loss: 0.8314... 0.1123 sec/batch\n", "Epoch: 15/20... Training Step: 6663... Training loss: 0.8971... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6664... Training loss: 1.2110... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6665... Training loss: 0.9778... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6666... Training loss: 1.0694... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6667... Training loss: 1.0552... 0.1172 sec/batch\n", "Epoch: 15/20... Training Step: 6668... Training loss: 1.0590... 0.1199 sec/batch\n", "Epoch: 15/20... Training Step: 6669... Training loss: 1.0338... 0.1272 sec/batch\n", "Epoch: 15/20... Training Step: 6670... Training loss: 0.9354... 0.1244 sec/batch\n", "Epoch: 15/20... Training Step: 6671... Training loss: 1.1733... 0.1223 sec/batch\n", "Epoch: 15/20... Training Step: 6672... Training loss: 0.8865... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6673... Training loss: 0.8780... 0.1154 sec/batch\n", "Epoch: 15/20... Training Step: 6674... Training loss: 1.0993... 0.1187 sec/batch\n", "Epoch: 15/20... Training Step: 6675... Training loss: 0.9595... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6676... Training loss: 1.0176... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6677... Training loss: 0.8136... 0.1137 sec/batch\n", "Epoch: 15/20... Training Step: 6678... Training loss: 1.2664... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6679... Training loss: 1.0579... 0.1176 sec/batch\n", "Epoch: 15/20... Training Step: 6680... Training loss: 1.0296... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6681... Training loss: 1.0904... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6682... Training loss: 1.0876... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6683... Training loss: 1.0146... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6684... Training loss: 0.8607... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6685... Training loss: 1.0775... 0.1245 sec/batch\n", "Epoch: 15/20... Training Step: 6686... Training loss: 0.8987... 0.1200 sec/batch\n", "Epoch: 15/20... Training Step: 6687... Training loss: 0.9457... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6688... Training loss: 1.0857... 0.1141 sec/batch\n", "Epoch: 15/20... Training Step: 6689... Training loss: 1.1209... 0.1242 sec/batch\n", "Epoch: 15/20... Training Step: 6690... Training loss: 1.1142... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6691... Training loss: 0.9697... 0.1224 sec/batch\n", "Epoch: 15/20... Training Step: 6692... Training loss: 1.0663... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6693... Training loss: 0.8911... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6694... Training loss: 1.1323... 0.1198 sec/batch\n", "Epoch: 15/20... Training Step: 6695... Training loss: 0.7624... 0.1211 sec/batch\n", "Epoch: 15/20... Training Step: 6696... Training loss: 0.9626... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6697... Training loss: 0.9555... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6698... Training loss: 1.1357... 0.1122 sec/batch\n", "Epoch: 15/20... Training Step: 6699... Training loss: 1.0132... 0.1100 sec/batch\n", "Epoch: 15/20... Training Step: 6700... Training loss: 1.0271... 0.1107 sec/batch\n", "Epoch: 15/20... Training Step: 6701... Training loss: 0.9760... 0.1131 sec/batch\n", "Epoch: 15/20... Training Step: 6702... Training loss: 0.9447... 0.1156 sec/batch\n", "Epoch: 15/20... Training Step: 6703... Training loss: 0.9878... 0.1120 sec/batch\n", "Epoch: 15/20... Training Step: 6704... Training loss: 0.9601... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6705... Training loss: 1.1364... 0.1095 sec/batch\n", "Epoch: 15/20... Training Step: 6706... Training loss: 0.8565... 0.1133 sec/batch\n", "Epoch: 15/20... Training Step: 6707... Training loss: 0.9928... 0.1231 sec/batch\n", "Epoch: 15/20... Training Step: 6708... Training loss: 1.1110... 0.1325 sec/batch\n", "Epoch: 15/20... Training Step: 6709... Training loss: 1.0820... 0.1195 sec/batch\n", "Epoch: 15/20... Training Step: 6710... Training loss: 0.9605... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6711... Training loss: 1.0650... 0.1212 sec/batch\n", "Epoch: 15/20... Training Step: 6712... Training loss: 1.0848... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6713... Training loss: 1.0196... 0.1211 sec/batch\n", "Epoch: 15/20... Training Step: 6714... Training loss: 1.0632... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6715... Training loss: 1.1063... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6716... Training loss: 1.0487... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6717... Training loss: 0.9490... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6718... Training loss: 1.2351... 0.1179 sec/batch\n", "Epoch: 15/20... Training Step: 6719... Training loss: 1.0429... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6720... Training loss: 1.2708... 0.1149 sec/batch\n", "Epoch: 15/20... Training Step: 6721... Training loss: 1.0779... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6722... Training loss: 1.1814... 0.1186 sec/batch\n", "Epoch: 15/20... Training Step: 6723... Training loss: 1.1660... 0.1204 sec/batch\n", "Epoch: 15/20... Training Step: 6724... Training loss: 0.9568... 0.1136 sec/batch\n", "Epoch: 15/20... Training Step: 6725... Training loss: 1.1080... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6726... Training loss: 0.9785... 0.1187 sec/batch\n", "Epoch: 15/20... Training Step: 6727... Training loss: 1.0305... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6728... Training loss: 0.9777... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6729... Training loss: 1.2124... 0.1211 sec/batch\n", "Epoch: 15/20... Training Step: 6730... Training loss: 1.0597... 0.1184 sec/batch\n", "Epoch: 15/20... Training Step: 6731... Training loss: 1.0805... 0.1212 sec/batch\n", "Epoch: 15/20... Training Step: 6732... Training loss: 1.0248... 0.1240 sec/batch\n", "Epoch: 15/20... Training Step: 6733... Training loss: 1.1033... 0.1253 sec/batch\n", "Epoch: 15/20... Training Step: 6734... Training loss: 0.9329... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6735... Training loss: 1.1220... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6736... Training loss: 1.0138... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6737... Training loss: 1.0124... 0.1303 sec/batch\n", "Epoch: 15/20... Training Step: 6738... Training loss: 0.9229... 0.1368 sec/batch\n", "Epoch: 15/20... Training Step: 6739... Training loss: 0.9560... 0.1272 sec/batch\n", "Epoch: 15/20... Training Step: 6740... Training loss: 1.0456... 0.1355 sec/batch\n", "Epoch: 15/20... Training Step: 6741... Training loss: 1.1132... 0.1237 sec/batch\n", "Epoch: 15/20... Training Step: 6742... Training loss: 0.9404... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6743... Training loss: 0.9950... 0.1194 sec/batch\n", "Epoch: 15/20... Training Step: 6744... Training loss: 1.1028... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6745... Training loss: 1.0576... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6746... Training loss: 1.0528... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6747... Training loss: 0.9890... 0.1237 sec/batch\n", "Epoch: 15/20... Training Step: 6748... Training loss: 1.0118... 0.1215 sec/batch\n", "Epoch: 15/20... Training Step: 6749... Training loss: 0.9943... 0.1222 sec/batch\n", "Epoch: 15/20... Training Step: 6750... Training loss: 0.9480... 0.1208 sec/batch\n", "Epoch: 15/20... Training Step: 6751... Training loss: 0.9692... 0.1583 sec/batch\n", "Epoch: 15/20... Training Step: 6752... Training loss: 1.1063... 0.1410 sec/batch\n", "Epoch: 15/20... Training Step: 6753... Training loss: 1.1836... 0.1335 sec/batch\n", "Epoch: 15/20... Training Step: 6754... Training loss: 0.9717... 0.1276 sec/batch\n", "Epoch: 15/20... Training Step: 6755... Training loss: 0.9445... 0.1215 sec/batch\n", "Epoch: 15/20... Training Step: 6756... Training loss: 0.9991... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6757... Training loss: 1.0592... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6758... Training loss: 1.0950... 0.1165 sec/batch\n", "Epoch: 15/20... Training Step: 6759... Training loss: 1.0532... 0.1131 sec/batch\n", "Epoch: 15/20... Training Step: 6760... Training loss: 1.0834... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6761... Training loss: 1.1485... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6762... Training loss: 0.9363... 0.1148 sec/batch\n", "Epoch: 15/20... Training Step: 6763... Training loss: 1.2381... 0.1177 sec/batch\n", "Epoch: 15/20... Training Step: 6764... Training loss: 1.1352... 0.1182 sec/batch\n", "Epoch: 15/20... Training Step: 6765... Training loss: 1.1141... 0.1156 sec/batch\n", "Epoch: 15/20... Training Step: 6766... Training loss: 1.1718... 0.1139 sec/batch\n", "Epoch: 15/20... Training Step: 6767... Training loss: 1.0655... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6768... Training loss: 1.1071... 0.1215 sec/batch\n", "Epoch: 15/20... Training Step: 6769... Training loss: 1.0768... 0.1222 sec/batch\n", "Epoch: 15/20... Training Step: 6770... Training loss: 1.1926... 0.1134 sec/batch\n", "Epoch: 15/20... Training Step: 6771... Training loss: 0.9967... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6772... Training loss: 1.0076... 0.1176 sec/batch\n", "Epoch: 15/20... Training Step: 6773... Training loss: 0.9750... 0.1161 sec/batch\n", "Epoch: 15/20... Training Step: 6774... Training loss: 1.2019... 0.1155 sec/batch\n", "Epoch: 15/20... Training Step: 6775... Training loss: 1.0330... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6776... Training loss: 0.9422... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6777... Training loss: 1.0327... 0.1168 sec/batch\n", "Epoch: 15/20... Training Step: 6778... Training loss: 1.0618... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6779... Training loss: 1.0575... 0.1200 sec/batch\n", "Epoch: 15/20... Training Step: 6780... Training loss: 1.0900... 0.1188 sec/batch\n", "Epoch: 15/20... Training Step: 6781... Training loss: 0.9455... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6782... Training loss: 0.9305... 0.1188 sec/batch\n", "Epoch: 15/20... Training Step: 6783... Training loss: 1.0326... 0.1155 sec/batch\n", "Epoch: 15/20... Training Step: 6784... Training loss: 1.1005... 0.1188 sec/batch\n", "Epoch: 15/20... Training Step: 6785... Training loss: 1.0623... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6786... Training loss: 1.0778... 0.1152 sec/batch\n", "Epoch: 15/20... Training Step: 6787... Training loss: 1.0997... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6788... Training loss: 0.9744... 0.1216 sec/batch\n", "Epoch: 15/20... Training Step: 6789... Training loss: 0.9911... 0.1179 sec/batch\n", "Epoch: 15/20... Training Step: 6790... Training loss: 1.0803... 0.1136 sec/batch\n", "Epoch: 15/20... Training Step: 6791... Training loss: 1.1085... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6792... Training loss: 1.1659... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6793... Training loss: 1.0423... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6794... Training loss: 0.8949... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6795... Training loss: 1.0118... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6796... Training loss: 1.0223... 0.1123 sec/batch\n", "Epoch: 15/20... Training Step: 6797... Training loss: 1.0608... 0.1212 sec/batch\n", "Epoch: 15/20... Training Step: 6798... Training loss: 0.9903... 0.1134 sec/batch\n", "Epoch: 15/20... Training Step: 6799... Training loss: 0.9151... 0.1214 sec/batch\n", "Epoch: 15/20... Training Step: 6800... Training loss: 1.2882... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6801... Training loss: 1.0631... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6802... Training loss: 1.1026... 0.1124 sec/batch\n", "Epoch: 15/20... Training Step: 6803... Training loss: 1.0593... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6804... Training loss: 1.1869... 0.1163 sec/batch\n", "Epoch: 15/20... Training Step: 6805... Training loss: 1.2459... 0.1145 sec/batch\n", "Epoch: 15/20... Training Step: 6806... Training loss: 1.0611... 0.1186 sec/batch\n", "Epoch: 15/20... Training Step: 6807... Training loss: 0.9280... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6808... Training loss: 0.9916... 0.1137 sec/batch\n", "Epoch: 15/20... Training Step: 6809... Training loss: 0.9397... 0.1216 sec/batch\n", "Epoch: 15/20... Training Step: 6810... Training loss: 0.9364... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6811... Training loss: 0.9037... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6812... Training loss: 0.8956... 0.1139 sec/batch\n", "Epoch: 15/20... Training Step: 6813... Training loss: 0.9080... 0.1127 sec/batch\n", "Epoch: 15/20... Training Step: 6814... Training loss: 1.1136... 0.1161 sec/batch\n", "Epoch: 15/20... Training Step: 6815... Training loss: 0.9171... 0.1168 sec/batch\n", "Epoch: 15/20... Training Step: 6816... Training loss: 0.9328... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6817... Training loss: 0.8656... 0.1158 sec/batch\n", "Epoch: 15/20... Training Step: 6818... Training loss: 1.1470... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6819... Training loss: 0.9779... 0.1159 sec/batch\n", "Epoch: 15/20... Training Step: 6820... Training loss: 0.8542... 0.1222 sec/batch\n", "Epoch: 15/20... Training Step: 6821... Training loss: 0.8958... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6822... Training loss: 0.8009... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6823... Training loss: 0.9194... 0.1201 sec/batch\n", "Epoch: 15/20... Training Step: 6824... Training loss: 1.0190... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6825... Training loss: 1.1042... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6826... Training loss: 0.9625... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6827... Training loss: 1.0845... 0.1160 sec/batch\n", "Epoch: 15/20... Training Step: 6828... Training loss: 1.1032... 0.1146 sec/batch\n", "Epoch: 15/20... Training Step: 6829... Training loss: 0.8736... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6830... Training loss: 1.1096... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6831... Training loss: 1.0335... 0.1154 sec/batch\n", "Epoch: 15/20... Training Step: 6832... Training loss: 0.9511... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6833... Training loss: 0.9516... 0.1207 sec/batch\n", "Epoch: 15/20... Training Step: 6834... Training loss: 0.9326... 0.1184 sec/batch\n", "Epoch: 15/20... Training Step: 6835... Training loss: 0.9433... 0.1204 sec/batch\n", "Epoch: 15/20... Training Step: 6836... Training loss: 0.9690... 0.1131 sec/batch\n", "Epoch: 15/20... Training Step: 6837... Training loss: 1.0162... 0.1133 sec/batch\n", "Epoch: 15/20... Training Step: 6838... Training loss: 1.1990... 0.1160 sec/batch\n", "Epoch: 15/20... Training Step: 6839... Training loss: 0.9595... 0.1194 sec/batch\n", "Epoch: 15/20... Training Step: 6840... Training loss: 1.1413... 0.1268 sec/batch\n", "Epoch: 15/20... Training Step: 6841... Training loss: 0.9334... 0.1149 sec/batch\n", "Epoch: 15/20... Training Step: 6842... Training loss: 0.9415... 0.1235 sec/batch\n", "Epoch: 15/20... Training Step: 6843... Training loss: 0.8384... 0.1480 sec/batch\n", "Epoch: 15/20... Training Step: 6844... Training loss: 1.2064... 0.1430 sec/batch\n", "Epoch: 15/20... Training Step: 6845... Training loss: 0.9263... 0.1186 sec/batch\n", "Epoch: 15/20... Training Step: 6846... Training loss: 0.9278... 0.1267 sec/batch\n", "Epoch: 15/20... Training Step: 6847... Training loss: 1.0316... 0.1188 sec/batch\n", "Epoch: 15/20... Training Step: 6848... Training loss: 0.9625... 0.1255 sec/batch\n", "Epoch: 15/20... Training Step: 6849... Training loss: 0.8615... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6850... Training loss: 0.7743... 0.1157 sec/batch\n", "Epoch: 15/20... Training Step: 6851... Training loss: 1.0212... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6852... Training loss: 0.9841... 0.1204 sec/batch\n", "Epoch: 15/20... Training Step: 6853... Training loss: 0.9253... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6854... Training loss: 1.1074... 0.1199 sec/batch\n", "Epoch: 15/20... Training Step: 6855... Training loss: 1.2505... 0.1165 sec/batch\n", "Epoch: 15/20... Training Step: 6856... Training loss: 0.8603... 0.1213 sec/batch\n", "Epoch: 15/20... Training Step: 6857... Training loss: 1.0941... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6858... Training loss: 1.0111... 0.1193 sec/batch\n", "Epoch: 15/20... Training Step: 6859... Training loss: 0.9568... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6860... Training loss: 1.0315... 0.1149 sec/batch\n", "Epoch: 15/20... Training Step: 6861... Training loss: 1.0891... 0.1138 sec/batch\n", "Epoch: 15/20... Training Step: 6862... Training loss: 1.0784... 0.1163 sec/batch\n", "Epoch: 15/20... Training Step: 6863... Training loss: 0.9990... 0.1273 sec/batch\n", "Epoch: 15/20... Training Step: 6864... Training loss: 1.1366... 0.1161 sec/batch\n", "Epoch: 15/20... Training Step: 6865... Training loss: 1.0290... 0.1181 sec/batch\n", "Epoch: 15/20... Training Step: 6866... Training loss: 0.9312... 0.1164 sec/batch\n", "Epoch: 15/20... Training Step: 6867... Training loss: 0.7874... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6868... Training loss: 1.0315... 0.1153 sec/batch\n", "Epoch: 15/20... Training Step: 6869... Training loss: 0.9255... 0.1146 sec/batch\n", "Epoch: 15/20... Training Step: 6870... Training loss: 1.2053... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6871... Training loss: 0.9280... 0.1223 sec/batch\n", "Epoch: 15/20... Training Step: 6872... Training loss: 1.0346... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6873... Training loss: 0.9944... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6874... Training loss: 1.1464... 0.1219 sec/batch\n", "Epoch: 15/20... Training Step: 6875... Training loss: 1.0061... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6876... Training loss: 1.0994... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6877... Training loss: 0.9471... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6878... Training loss: 0.9179... 0.1190 sec/batch\n", "Epoch: 15/20... Training Step: 6879... Training loss: 0.9368... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6880... Training loss: 1.1119... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6881... Training loss: 0.8844... 0.1207 sec/batch\n", "Epoch: 15/20... Training Step: 6882... Training loss: 0.9586... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6883... Training loss: 1.1055... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6884... Training loss: 0.8867... 0.1205 sec/batch\n", "Epoch: 15/20... Training Step: 6885... Training loss: 0.8877... 0.1206 sec/batch\n", "Epoch: 15/20... Training Step: 6886... Training loss: 1.0813... 0.1163 sec/batch\n", "Epoch: 15/20... Training Step: 6887... Training loss: 0.9972... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6888... Training loss: 0.9264... 0.1161 sec/batch\n", "Epoch: 15/20... Training Step: 6889... Training loss: 1.0818... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6890... Training loss: 0.9540... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6891... Training loss: 1.0140... 0.1184 sec/batch\n", "Epoch: 15/20... Training Step: 6892... Training loss: 1.0507... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6893... Training loss: 0.9124... 0.1180 sec/batch\n", "Epoch: 15/20... Training Step: 6894... Training loss: 1.0473... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6895... Training loss: 1.0077... 0.1173 sec/batch\n", "Epoch: 15/20... Training Step: 6896... Training loss: 1.1014... 0.1151 sec/batch\n", "Epoch: 15/20... Training Step: 6897... Training loss: 0.9444... 0.1212 sec/batch\n", "Epoch: 15/20... Training Step: 6898... Training loss: 0.9800... 0.1191 sec/batch\n", "Epoch: 15/20... Training Step: 6899... Training loss: 0.8963... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6900... Training loss: 1.0347... 0.1145 sec/batch\n", "Epoch: 15/20... Training Step: 6901... Training loss: 1.0744... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6902... Training loss: 1.0411... 0.1167 sec/batch\n", "Epoch: 15/20... Training Step: 6903... Training loss: 1.0946... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6904... Training loss: 1.2304... 0.1195 sec/batch\n", "Epoch: 15/20... Training Step: 6905... Training loss: 0.8963... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6906... Training loss: 0.9680... 0.1162 sec/batch\n", "Epoch: 15/20... Training Step: 6907... Training loss: 0.9032... 0.1202 sec/batch\n", "Epoch: 15/20... Training Step: 6908... Training loss: 0.9238... 0.1150 sec/batch\n", "Epoch: 15/20... Training Step: 6909... Training loss: 1.0511... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6910... Training loss: 0.8669... 0.1237 sec/batch\n", "Epoch: 15/20... Training Step: 6911... Training loss: 1.0622... 0.1187 sec/batch\n", "Epoch: 15/20... Training Step: 6912... Training loss: 1.0791... 0.1245 sec/batch\n", "Epoch: 15/20... Training Step: 6913... Training loss: 1.0049... 0.1250 sec/batch\n", "Epoch: 15/20... Training Step: 6914... Training loss: 0.9165... 0.1222 sec/batch\n", "Epoch: 15/20... Training Step: 6915... Training loss: 0.8702... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6916... Training loss: 0.8922... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6917... Training loss: 1.0856... 0.1242 sec/batch\n", "Epoch: 15/20... Training Step: 6918... Training loss: 0.9481... 0.1285 sec/batch\n", "Epoch: 15/20... Training Step: 6919... Training loss: 1.0237... 0.1251 sec/batch\n", "Epoch: 15/20... Training Step: 6920... Training loss: 1.0620... 0.1282 sec/batch\n", "Epoch: 15/20... Training Step: 6921... Training loss: 0.9703... 0.1183 sec/batch\n", "Epoch: 15/20... Training Step: 6922... Training loss: 1.0614... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6923... Training loss: 0.8612... 0.1201 sec/batch\n", "Epoch: 15/20... Training Step: 6924... Training loss: 0.8586... 0.1189 sec/batch\n", "Epoch: 15/20... Training Step: 6925... Training loss: 0.8879... 0.1214 sec/batch\n", "Epoch: 15/20... Training Step: 6926... Training loss: 0.9492... 0.1180 sec/batch\n", "Epoch: 15/20... Training Step: 6927... Training loss: 1.1726... 0.1204 sec/batch\n", "Epoch: 15/20... Training Step: 6928... Training loss: 0.9658... 0.1220 sec/batch\n", "Epoch: 15/20... Training Step: 6929... Training loss: 1.1220... 0.1199 sec/batch\n", "Epoch: 15/20... Training Step: 6930... Training loss: 1.0549... 0.1245 sec/batch\n", "Epoch: 15/20... Training Step: 6931... Training loss: 0.9249... 0.1147 sec/batch\n", "Epoch: 15/20... Training Step: 6932... Training loss: 1.0239... 0.1171 sec/batch\n", "Epoch: 15/20... Training Step: 6933... Training loss: 0.8686... 0.1160 sec/batch\n", "Epoch: 15/20... Training Step: 6934... Training loss: 1.1294... 0.1155 sec/batch\n", "Epoch: 15/20... Training Step: 6935... Training loss: 1.1428... 0.1221 sec/batch\n", "Epoch: 15/20... Training Step: 6936... Training loss: 1.1718... 0.1160 sec/batch\n", "Epoch: 15/20... Training Step: 6937... Training loss: 0.8317... 0.1192 sec/batch\n", "Epoch: 15/20... Training Step: 6938... Training loss: 1.0967... 0.1169 sec/batch\n", "Epoch: 15/20... Training Step: 6939... Training loss: 0.9797... 0.1203 sec/batch\n", "Epoch: 15/20... Training Step: 6940... Training loss: 0.9744... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6941... Training loss: 1.0083... 0.1235 sec/batch\n", "Epoch: 15/20... Training Step: 6942... Training loss: 1.0474... 0.1170 sec/batch\n", "Epoch: 15/20... Training Step: 6943... Training loss: 0.9513... 0.1165 sec/batch\n", "Epoch: 15/20... Training Step: 6944... Training loss: 0.8746... 0.1196 sec/batch\n", "Epoch: 15/20... Training Step: 6945... Training loss: 0.9480... 0.1179 sec/batch\n", "Epoch: 15/20... Training Step: 6946... Training loss: 1.0609... 0.1215 sec/batch\n", "Epoch: 15/20... Training Step: 6947... Training loss: 0.8445... 0.1174 sec/batch\n", "Epoch: 15/20... Training Step: 6948... Training loss: 0.9921... 0.1178 sec/batch\n", "Epoch: 15/20... Training Step: 6949... Training loss: 0.9529... 0.1197 sec/batch\n", "Epoch: 15/20... Training Step: 6950... Training loss: 1.2175... 0.1244 sec/batch\n", "Epoch: 15/20... Training Step: 6951... Training loss: 1.0624... 0.1286 sec/batch\n", "Epoch: 15/20... Training Step: 6952... Training loss: 0.8996... 0.1319 sec/batch\n", "Epoch: 15/20... Training Step: 6953... Training loss: 0.8964... 0.1185 sec/batch\n", "Epoch: 15/20... Training Step: 6954... Training loss: 0.8401... 0.1239 sec/batch\n", "Epoch: 15/20... Training Step: 6955... Training loss: 0.8732... 0.1262 sec/batch\n", "Epoch: 15/20... Training Step: 6956... Training loss: 0.9830... 0.1166 sec/batch\n", "Epoch: 15/20... Training Step: 6957... Training loss: 0.9378... 0.1210 sec/batch\n", "Epoch: 15/20... Training Step: 6958... Training loss: 1.0186... 0.1177 sec/batch\n", "Epoch: 15/20... Training Step: 6959... Training loss: 0.9887... 0.1175 sec/batch\n", "Epoch: 15/20... Training Step: 6960... Training loss: 1.0287... 0.1169 sec/batch\n", "Epoch: 16/20... Training Step: 6961... Training loss: 1.1333... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 6962... Training loss: 1.0790... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 6963... Training loss: 1.1068... 0.1228 sec/batch\n", "Epoch: 16/20... Training Step: 6964... Training loss: 1.1257... 0.1194 sec/batch\n", "Epoch: 16/20... Training Step: 6965... Training loss: 1.0779... 0.1212 sec/batch\n", "Epoch: 16/20... Training Step: 6966... Training loss: 0.9662... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 6967... Training loss: 1.0658... 0.1223 sec/batch\n", "Epoch: 16/20... Training Step: 6968... Training loss: 0.9632... 0.1154 sec/batch\n", "Epoch: 16/20... Training Step: 6969... Training loss: 0.7743... 0.1211 sec/batch\n", "Epoch: 16/20... Training Step: 6970... Training loss: 1.0108... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 6971... Training loss: 0.9431... 0.1210 sec/batch\n", "Epoch: 16/20... Training Step: 6972... Training loss: 0.9104... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 6973... Training loss: 1.1822... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 6974... Training loss: 0.7536... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 6975... Training loss: 1.0279... 0.1155 sec/batch\n", "Epoch: 16/20... Training Step: 6976... Training loss: 1.0946... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 6977... Training loss: 0.9426... 0.1193 sec/batch\n", "Epoch: 16/20... Training Step: 6978... Training loss: 1.0575... 0.1138 sec/batch\n", "Epoch: 16/20... Training Step: 6979... Training loss: 1.0523... 0.1186 sec/batch\n", "Epoch: 16/20... Training Step: 6980... Training loss: 0.9840... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 6981... Training loss: 0.9742... 0.1186 sec/batch\n", "Epoch: 16/20... Training Step: 6982... Training loss: 0.9730... 0.1229 sec/batch\n", "Epoch: 16/20... Training Step: 6983... Training loss: 1.1550... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 6984... Training loss: 0.8731... 0.1166 sec/batch\n", "Epoch: 16/20... Training Step: 6985... Training loss: 0.9552... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 6986... Training loss: 0.9489... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 6987... Training loss: 1.1260... 0.1149 sec/batch\n", "Epoch: 16/20... Training Step: 6988... Training loss: 0.9653... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 6989... Training loss: 0.9294... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 6990... Training loss: 1.0076... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 6991... Training loss: 0.9812... 0.1167 sec/batch\n", "Epoch: 16/20... Training Step: 6992... Training loss: 0.8331... 0.1224 sec/batch\n", "Epoch: 16/20... Training Step: 6993... Training loss: 0.9562... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 6994... Training loss: 0.9095... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 6995... Training loss: 0.8529... 0.1183 sec/batch\n", "Epoch: 16/20... Training Step: 6996... Training loss: 0.8459... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 6997... Training loss: 0.9255... 0.1183 sec/batch\n", "Epoch: 16/20... Training Step: 6998... Training loss: 0.8733... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 6999... Training loss: 0.8411... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7000... Training loss: 1.2394... 0.1137 sec/batch\n", "Epoch: 16/20... Training Step: 7001... Training loss: 0.9696... 0.1195 sec/batch\n", "Epoch: 16/20... Training Step: 7002... Training loss: 0.9215... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7003... Training loss: 1.0692... 0.1204 sec/batch\n", "Epoch: 16/20... Training Step: 7004... Training loss: 0.8447... 0.1126 sec/batch\n", "Epoch: 16/20... Training Step: 7005... Training loss: 0.9486... 0.1207 sec/batch\n", "Epoch: 16/20... Training Step: 7006... Training loss: 0.8775... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7007... Training loss: 0.9330... 0.1152 sec/batch\n", "Epoch: 16/20... Training Step: 7008... Training loss: 1.0170... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7009... Training loss: 0.8771... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7010... Training loss: 0.9803... 0.1192 sec/batch\n", "Epoch: 16/20... Training Step: 7011... Training loss: 0.9992... 0.1208 sec/batch\n", "Epoch: 16/20... Training Step: 7012... Training loss: 0.9405... 0.1144 sec/batch\n", "Epoch: 16/20... Training Step: 7013... Training loss: 0.9195... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7014... Training loss: 0.9362... 0.1134 sec/batch\n", "Epoch: 16/20... Training Step: 7015... Training loss: 0.8977... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7016... Training loss: 0.9771... 0.1142 sec/batch\n", "Epoch: 16/20... Training Step: 7017... Training loss: 0.9191... 0.1199 sec/batch\n", "Epoch: 16/20... Training Step: 7018... Training loss: 1.0372... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7019... Training loss: 0.8353... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7020... Training loss: 0.8787... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7021... Training loss: 0.9051... 0.1150 sec/batch\n", "Epoch: 16/20... Training Step: 7022... Training loss: 1.0401... 0.1161 sec/batch\n", "Epoch: 16/20... Training Step: 7023... Training loss: 0.9227... 0.1165 sec/batch\n", "Epoch: 16/20... Training Step: 7024... Training loss: 0.9573... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7025... Training loss: 0.9478... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7026... Training loss: 1.1726... 0.1155 sec/batch\n", "Epoch: 16/20... Training Step: 7027... Training loss: 0.9931... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7028... Training loss: 0.9751... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7029... Training loss: 0.9094... 0.1135 sec/batch\n", "Epoch: 16/20... Training Step: 7030... Training loss: 0.9701... 0.1223 sec/batch\n", "Epoch: 16/20... Training Step: 7031... Training loss: 1.0832... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7032... Training loss: 0.9279... 0.1173 sec/batch\n", "Epoch: 16/20... Training Step: 7033... Training loss: 0.9121... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7034... Training loss: 0.8121... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7035... Training loss: 1.1808... 0.1157 sec/batch\n", "Epoch: 16/20... Training Step: 7036... Training loss: 0.8829... 0.1135 sec/batch\n", "Epoch: 16/20... Training Step: 7037... Training loss: 0.9465... 0.1123 sec/batch\n", "Epoch: 16/20... Training Step: 7038... Training loss: 0.9284... 0.1222 sec/batch\n", "Epoch: 16/20... Training Step: 7039... Training loss: 1.0453... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7040... Training loss: 0.8917... 0.1209 sec/batch\n", "Epoch: 16/20... Training Step: 7041... Training loss: 1.2090... 0.1170 sec/batch\n", "Epoch: 16/20... Training Step: 7042... Training loss: 0.9012... 0.1149 sec/batch\n", "Epoch: 16/20... Training Step: 7043... Training loss: 0.8297... 0.1192 sec/batch\n", "Epoch: 16/20... Training Step: 7044... Training loss: 1.0218... 0.1236 sec/batch\n", "Epoch: 16/20... Training Step: 7045... Training loss: 0.9748... 0.1294 sec/batch\n", "Epoch: 16/20... Training Step: 7046... Training loss: 1.1056... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 7047... Training loss: 0.9351... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7048... Training loss: 1.1500... 0.1165 sec/batch\n", "Epoch: 16/20... Training Step: 7049... Training loss: 1.0241... 0.1148 sec/batch\n", "Epoch: 16/20... Training Step: 7050... Training loss: 0.9663... 0.1124 sec/batch\n", "Epoch: 16/20... Training Step: 7051... Training loss: 1.0666... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7052... Training loss: 1.1449... 0.1119 sec/batch\n", "Epoch: 16/20... Training Step: 7053... Training loss: 0.8723... 0.1130 sec/batch\n", "Epoch: 16/20... Training Step: 7054... Training loss: 1.2180... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7055... Training loss: 1.0186... 0.1196 sec/batch\n", "Epoch: 16/20... Training Step: 7056... Training loss: 0.9323... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7057... Training loss: 1.1748... 0.1187 sec/batch\n", "Epoch: 16/20... Training Step: 7058... Training loss: 1.0486... 0.1222 sec/batch\n", "Epoch: 16/20... Training Step: 7059... Training loss: 1.1499... 0.1130 sec/batch\n", "Epoch: 16/20... Training Step: 7060... Training loss: 1.0585... 0.1169 sec/batch\n", "Epoch: 16/20... Training Step: 7061... Training loss: 1.0631... 0.1194 sec/batch\n", "Epoch: 16/20... Training Step: 7062... Training loss: 1.1187... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7063... Training loss: 1.1889... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7064... Training loss: 1.0728... 0.1149 sec/batch\n", "Epoch: 16/20... Training Step: 7065... Training loss: 1.0737... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7066... Training loss: 1.1936... 0.1165 sec/batch\n", "Epoch: 16/20... Training Step: 7067... Training loss: 0.9580... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7068... Training loss: 1.0927... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7069... Training loss: 1.0151... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7070... Training loss: 1.0413... 0.1151 sec/batch\n", "Epoch: 16/20... Training Step: 7071... Training loss: 1.0231... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7072... Training loss: 0.8874... 0.1139 sec/batch\n", "Epoch: 16/20... Training Step: 7073... Training loss: 1.0379... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7074... Training loss: 1.0661... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7075... Training loss: 0.9258... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7076... Training loss: 0.8496... 0.1199 sec/batch\n", "Epoch: 16/20... Training Step: 7077... Training loss: 0.9543... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7078... Training loss: 1.0415... 0.1176 sec/batch\n", "Epoch: 16/20... Training Step: 7079... Training loss: 1.0289... 0.1179 sec/batch\n", "Epoch: 16/20... Training Step: 7080... Training loss: 0.9020... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7081... Training loss: 1.0283... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7082... Training loss: 1.0413... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7083... Training loss: 1.0489... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7084... Training loss: 1.0675... 0.1193 sec/batch\n", "Epoch: 16/20... Training Step: 7085... Training loss: 1.0815... 0.1158 sec/batch\n", "Epoch: 16/20... Training Step: 7086... Training loss: 0.9366... 0.1155 sec/batch\n", "Epoch: 16/20... Training Step: 7087... Training loss: 1.0213... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7088... Training loss: 1.0737... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 7089... Training loss: 1.0044... 0.1200 sec/batch\n", "Epoch: 16/20... Training Step: 7090... Training loss: 0.9933... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7091... Training loss: 1.2071... 0.1170 sec/batch\n", "Epoch: 16/20... Training Step: 7092... Training loss: 0.9217... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7093... Training loss: 0.9370... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7094... Training loss: 1.0313... 0.1228 sec/batch\n", "Epoch: 16/20... Training Step: 7095... Training loss: 0.9422... 0.1143 sec/batch\n", "Epoch: 16/20... Training Step: 7096... Training loss: 0.9349... 0.1196 sec/batch\n", "Epoch: 16/20... Training Step: 7097... Training loss: 0.8812... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7098... Training loss: 0.9308... 0.1225 sec/batch\n", "Epoch: 16/20... Training Step: 7099... Training loss: 0.9520... 0.1340 sec/batch\n", "Epoch: 16/20... Training Step: 7100... Training loss: 0.9668... 0.1261 sec/batch\n", "Epoch: 16/20... Training Step: 7101... Training loss: 0.7827... 0.1275 sec/batch\n", "Epoch: 16/20... Training Step: 7102... Training loss: 0.9841... 0.1224 sec/batch\n", "Epoch: 16/20... Training Step: 7103... Training loss: 0.8243... 0.1286 sec/batch\n", "Epoch: 16/20... Training Step: 7104... Training loss: 1.0372... 0.1219 sec/batch\n", "Epoch: 16/20... Training Step: 7105... Training loss: 0.9583... 0.1272 sec/batch\n", "Epoch: 16/20... Training Step: 7106... Training loss: 0.9429... 0.1237 sec/batch\n", "Epoch: 16/20... Training Step: 7107... Training loss: 1.0338... 0.1239 sec/batch\n", "Epoch: 16/20... Training Step: 7108... Training loss: 0.9952... 0.1244 sec/batch\n", "Epoch: 16/20... Training Step: 7109... Training loss: 0.8094... 0.1233 sec/batch\n", "Epoch: 16/20... Training Step: 7110... Training loss: 0.9154... 0.1228 sec/batch\n", "Epoch: 16/20... Training Step: 7111... Training loss: 0.9617... 0.1239 sec/batch\n", "Epoch: 16/20... Training Step: 7112... Training loss: 1.0545... 0.1246 sec/batch\n", "Epoch: 16/20... Training Step: 7113... Training loss: 1.0915... 0.1236 sec/batch\n", "Epoch: 16/20... Training Step: 7114... Training loss: 0.9728... 0.1235 sec/batch\n", "Epoch: 16/20... Training Step: 7115... Training loss: 0.9007... 0.1263 sec/batch\n", "Epoch: 16/20... Training Step: 7116... Training loss: 0.8959... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7117... Training loss: 1.0533... 0.1254 sec/batch\n", "Epoch: 16/20... Training Step: 7118... Training loss: 0.9299... 0.1194 sec/batch\n", "Epoch: 16/20... Training Step: 7119... Training loss: 0.9442... 0.1229 sec/batch\n", "Epoch: 16/20... Training Step: 7120... Training loss: 0.8549... 0.1228 sec/batch\n", "Epoch: 16/20... Training Step: 7121... Training loss: 0.9460... 0.1273 sec/batch\n", "Epoch: 16/20... Training Step: 7122... Training loss: 1.0103... 0.1211 sec/batch\n", "Epoch: 16/20... Training Step: 7123... Training loss: 1.3024... 0.1205 sec/batch\n", "Epoch: 16/20... Training Step: 7124... Training loss: 0.9533... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7125... Training loss: 0.9239... 0.1252 sec/batch\n", "Epoch: 16/20... Training Step: 7126... Training loss: 0.8062... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7127... Training loss: 0.9346... 0.1290 sec/batch\n", "Epoch: 16/20... Training Step: 7128... Training loss: 1.0651... 0.1245 sec/batch\n", "Epoch: 16/20... Training Step: 7129... Training loss: 0.9690... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7130... Training loss: 0.9410... 0.1169 sec/batch\n", "Epoch: 16/20... Training Step: 7131... Training loss: 1.0013... 0.1214 sec/batch\n", "Epoch: 16/20... Training Step: 7132... Training loss: 1.1654... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 7133... Training loss: 0.9002... 0.1200 sec/batch\n", "Epoch: 16/20... Training Step: 7134... Training loss: 0.8454... 0.1288 sec/batch\n", "Epoch: 16/20... Training Step: 7135... Training loss: 1.1436... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7136... Training loss: 0.8330... 0.1236 sec/batch\n", "Epoch: 16/20... Training Step: 7137... Training loss: 0.8549... 0.1247 sec/batch\n", "Epoch: 16/20... Training Step: 7138... Training loss: 1.0686... 0.1226 sec/batch\n", "Epoch: 16/20... Training Step: 7139... Training loss: 0.8185... 0.1226 sec/batch\n", "Epoch: 16/20... Training Step: 7140... Training loss: 0.9772... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7141... Training loss: 0.8435... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7142... Training loss: 1.1695... 0.1162 sec/batch\n", "Epoch: 16/20... Training Step: 7143... Training loss: 1.1011... 0.1248 sec/batch\n", "Epoch: 16/20... Training Step: 7144... Training loss: 0.9327... 0.1221 sec/batch\n", "Epoch: 16/20... Training Step: 7145... Training loss: 1.0942... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7146... Training loss: 1.0482... 0.1267 sec/batch\n", "Epoch: 16/20... Training Step: 7147... Training loss: 1.1672... 0.1255 sec/batch\n", "Epoch: 16/20... Training Step: 7148... Training loss: 0.9443... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 7149... Training loss: 1.1316... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7150... Training loss: 1.0162... 0.1199 sec/batch\n", "Epoch: 16/20... Training Step: 7151... Training loss: 0.8867... 0.1246 sec/batch\n", "Epoch: 16/20... Training Step: 7152... Training loss: 0.9985... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7153... Training loss: 0.9455... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7154... Training loss: 0.9223... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7155... Training loss: 0.9996... 0.1212 sec/batch\n", "Epoch: 16/20... Training Step: 7156... Training loss: 1.0858... 0.1293 sec/batch\n", "Epoch: 16/20... Training Step: 7157... Training loss: 0.9614... 0.1246 sec/batch\n", "Epoch: 16/20... Training Step: 7158... Training loss: 1.0139... 0.1215 sec/batch\n", "Epoch: 16/20... Training Step: 7159... Training loss: 0.7818... 0.1306 sec/batch\n", "Epoch: 16/20... Training Step: 7160... Training loss: 0.8248... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7161... Training loss: 0.8844... 0.1212 sec/batch\n", "Epoch: 16/20... Training Step: 7162... Training loss: 0.9583... 0.1274 sec/batch\n", "Epoch: 16/20... Training Step: 7163... Training loss: 0.9800... 0.1221 sec/batch\n", "Epoch: 16/20... Training Step: 7164... Training loss: 1.2071... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 7165... Training loss: 0.8434... 0.1235 sec/batch\n", "Epoch: 16/20... Training Step: 7166... Training loss: 0.9767... 0.1222 sec/batch\n", "Epoch: 16/20... Training Step: 7167... Training loss: 1.0140... 0.1266 sec/batch\n", "Epoch: 16/20... Training Step: 7168... Training loss: 0.9341... 0.1307 sec/batch\n", "Epoch: 16/20... Training Step: 7169... Training loss: 1.0838... 0.1258 sec/batch\n", "Epoch: 16/20... Training Step: 7170... Training loss: 0.9163... 0.1237 sec/batch\n", "Epoch: 16/20... Training Step: 7171... Training loss: 0.9695... 0.1241 sec/batch\n", "Epoch: 16/20... Training Step: 7172... Training loss: 1.1038... 0.1277 sec/batch\n", "Epoch: 16/20... Training Step: 7173... Training loss: 0.9695... 0.1253 sec/batch\n", "Epoch: 16/20... Training Step: 7174... Training loss: 0.9735... 0.1232 sec/batch\n", "Epoch: 16/20... Training Step: 7175... Training loss: 1.0980... 0.1265 sec/batch\n", "Epoch: 16/20... Training Step: 7176... Training loss: 0.9503... 0.1235 sec/batch\n", "Epoch: 16/20... Training Step: 7177... Training loss: 1.0635... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7178... Training loss: 1.1084... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7179... Training loss: 1.1657... 0.1194 sec/batch\n", "Epoch: 16/20... Training Step: 7180... Training loss: 1.0264... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7181... Training loss: 0.8898... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 7182... Training loss: 1.2179... 0.1160 sec/batch\n", "Epoch: 16/20... Training Step: 7183... Training loss: 0.9448... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7184... Training loss: 1.2049... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7185... Training loss: 1.0032... 0.1196 sec/batch\n", "Epoch: 16/20... Training Step: 7186... Training loss: 1.0960... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7187... Training loss: 1.1483... 0.1336 sec/batch\n", "Epoch: 16/20... Training Step: 7188... Training loss: 1.0206... 0.1303 sec/batch\n", "Epoch: 16/20... Training Step: 7189... Training loss: 1.1042... 0.1249 sec/batch\n", "Epoch: 16/20... Training Step: 7190... Training loss: 0.9747... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7191... Training loss: 1.0922... 0.1162 sec/batch\n", "Epoch: 16/20... Training Step: 7192... Training loss: 0.9913... 0.1140 sec/batch\n", "Epoch: 16/20... Training Step: 7193... Training loss: 1.2074... 0.1168 sec/batch\n", "Epoch: 16/20... Training Step: 7194... Training loss: 1.1012... 0.1170 sec/batch\n", "Epoch: 16/20... Training Step: 7195... Training loss: 1.1730... 0.1150 sec/batch\n", "Epoch: 16/20... Training Step: 7196... Training loss: 0.9284... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7197... Training loss: 1.1750... 0.1141 sec/batch\n", "Epoch: 16/20... Training Step: 7198... Training loss: 0.8527... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7199... Training loss: 1.0429... 0.1179 sec/batch\n", "Epoch: 16/20... Training Step: 7200... Training loss: 1.0536... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7201... Training loss: 0.9911... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7202... Training loss: 1.0055... 0.1168 sec/batch\n", "Epoch: 16/20... Training Step: 7203... Training loss: 1.1098... 0.1146 sec/batch\n", "Epoch: 16/20... Training Step: 7204... Training loss: 0.9554... 0.1138 sec/batch\n", "Epoch: 16/20... Training Step: 7205... Training loss: 0.9940... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7206... Training loss: 0.9573... 0.1244 sec/batch\n", "Epoch: 16/20... Training Step: 7207... Training loss: 0.9461... 0.1246 sec/batch\n", "Epoch: 16/20... Training Step: 7208... Training loss: 1.1863... 0.1207 sec/batch\n", "Epoch: 16/20... Training Step: 7209... Training loss: 0.9815... 0.1241 sec/batch\n", "Epoch: 16/20... Training Step: 7210... Training loss: 1.0252... 0.1136 sec/batch\n", "Epoch: 16/20... Training Step: 7211... Training loss: 1.1695... 0.1219 sec/batch\n", "Epoch: 16/20... Training Step: 7212... Training loss: 0.9241... 0.1195 sec/batch\n", "Epoch: 16/20... Training Step: 7213... Training loss: 0.9655... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7214... Training loss: 1.0326... 0.1193 sec/batch\n", "Epoch: 16/20... Training Step: 7215... Training loss: 0.9465... 0.1136 sec/batch\n", "Epoch: 16/20... Training Step: 7216... Training loss: 0.9759... 0.1158 sec/batch\n", "Epoch: 16/20... Training Step: 7217... Training loss: 1.1148... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7218... Training loss: 0.9243... 0.1431 sec/batch\n", "Epoch: 16/20... Training Step: 7219... Training loss: 0.8535... 0.1384 sec/batch\n", "Epoch: 16/20... Training Step: 7220... Training loss: 0.9929... 0.1255 sec/batch\n", "Epoch: 16/20... Training Step: 7221... Training loss: 1.0345... 0.1350 sec/batch\n", "Epoch: 16/20... Training Step: 7222... Training loss: 1.0050... 0.1346 sec/batch\n", "Epoch: 16/20... Training Step: 7223... Training loss: 1.0223... 0.1266 sec/batch\n", "Epoch: 16/20... Training Step: 7224... Training loss: 1.0488... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7225... Training loss: 1.1503... 0.1234 sec/batch\n", "Epoch: 16/20... Training Step: 7226... Training loss: 0.9873... 0.1238 sec/batch\n", "Epoch: 16/20... Training Step: 7227... Training loss: 1.2493... 0.1235 sec/batch\n", "Epoch: 16/20... Training Step: 7228... Training loss: 1.0929... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 7229... Training loss: 1.0527... 0.1214 sec/batch\n", "Epoch: 16/20... Training Step: 7230... Training loss: 1.2109... 0.1150 sec/batch\n", "Epoch: 16/20... Training Step: 7231... Training loss: 0.9655... 0.1213 sec/batch\n", "Epoch: 16/20... Training Step: 7232... Training loss: 1.0385... 0.1204 sec/batch\n", "Epoch: 16/20... Training Step: 7233... Training loss: 1.0869... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7234... Training loss: 1.0442... 0.1170 sec/batch\n", "Epoch: 16/20... Training Step: 7235... Training loss: 1.1196... 0.1183 sec/batch\n", "Epoch: 16/20... Training Step: 7236... Training loss: 0.9849... 0.1207 sec/batch\n", "Epoch: 16/20... Training Step: 7237... Training loss: 0.8516... 0.1195 sec/batch\n", "Epoch: 16/20... Training Step: 7238... Training loss: 1.1631... 0.1162 sec/batch\n", "Epoch: 16/20... Training Step: 7239... Training loss: 1.0645... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7240... Training loss: 0.9901... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7241... Training loss: 0.9352... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7242... Training loss: 1.0522... 0.1174 sec/batch\n", "Epoch: 16/20... Training Step: 7243... Training loss: 0.9754... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7244... Training loss: 1.0874... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7245... Training loss: 0.9472... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7246... Training loss: 0.9805... 0.1208 sec/batch\n", "Epoch: 16/20... Training Step: 7247... Training loss: 1.0668... 0.1168 sec/batch\n", "Epoch: 16/20... Training Step: 7248... Training loss: 1.0769... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7249... Training loss: 1.1234... 0.1195 sec/batch\n", "Epoch: 16/20... Training Step: 7250... Training loss: 1.1183... 0.1167 sec/batch\n", "Epoch: 16/20... Training Step: 7251... Training loss: 1.0462... 0.1186 sec/batch\n", "Epoch: 16/20... Training Step: 7252... Training loss: 0.9488... 0.1147 sec/batch\n", "Epoch: 16/20... Training Step: 7253... Training loss: 0.9421... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7254... Training loss: 1.0472... 0.1205 sec/batch\n", "Epoch: 16/20... Training Step: 7255... Training loss: 1.0568... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7256... Training loss: 1.2393... 0.1143 sec/batch\n", "Epoch: 16/20... Training Step: 7257... Training loss: 1.0126... 0.1160 sec/batch\n", "Epoch: 16/20... Training Step: 7258... Training loss: 0.9098... 0.1285 sec/batch\n", "Epoch: 16/20... Training Step: 7259... Training loss: 1.0679... 0.1116 sec/batch\n", "Epoch: 16/20... Training Step: 7260... Training loss: 1.0590... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7261... Training loss: 1.0165... 0.1168 sec/batch\n", "Epoch: 16/20... Training Step: 7262... Training loss: 1.0519... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7263... Training loss: 0.8759... 0.1173 sec/batch\n", "Epoch: 16/20... Training Step: 7264... Training loss: 1.2913... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7265... Training loss: 1.0032... 0.1148 sec/batch\n", "Epoch: 16/20... Training Step: 7266... Training loss: 1.2126... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7267... Training loss: 0.9678... 0.1215 sec/batch\n", "Epoch: 16/20... Training Step: 7268... Training loss: 1.2948... 0.1144 sec/batch\n", "Epoch: 16/20... Training Step: 7269... Training loss: 1.1026... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7270... Training loss: 0.9990... 0.1161 sec/batch\n", "Epoch: 16/20... Training Step: 7271... Training loss: 1.0190... 0.1204 sec/batch\n", "Epoch: 16/20... Training Step: 7272... Training loss: 0.8508... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7273... Training loss: 0.7859... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 7274... Training loss: 0.8772... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7275... Training loss: 0.9595... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7276... Training loss: 0.9077... 0.1224 sec/batch\n", "Epoch: 16/20... Training Step: 7277... Training loss: 0.8664... 0.1187 sec/batch\n", "Epoch: 16/20... Training Step: 7278... Training loss: 0.9955... 0.1236 sec/batch\n", "Epoch: 16/20... Training Step: 7279... Training loss: 0.7814... 0.1257 sec/batch\n", "Epoch: 16/20... Training Step: 7280... Training loss: 0.8483... 0.1331 sec/batch\n", "Epoch: 16/20... Training Step: 7281... Training loss: 0.8619... 0.1257 sec/batch\n", "Epoch: 16/20... Training Step: 7282... Training loss: 1.1923... 0.1252 sec/batch\n", "Epoch: 16/20... Training Step: 7283... Training loss: 0.8776... 0.1277 sec/batch\n", "Epoch: 16/20... Training Step: 7284... Training loss: 0.9697... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7285... Training loss: 0.9111... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7286... Training loss: 0.8335... 0.1148 sec/batch\n", "Epoch: 16/20... Training Step: 7287... Training loss: 0.8900... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7288... Training loss: 0.9816... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7289... Training loss: 1.1606... 0.1162 sec/batch\n", "Epoch: 16/20... Training Step: 7290... Training loss: 0.9877... 0.1129 sec/batch\n", "Epoch: 16/20... Training Step: 7291... Training loss: 1.1471... 0.1214 sec/batch\n", "Epoch: 16/20... Training Step: 7292... Training loss: 1.0699... 0.1196 sec/batch\n", "Epoch: 16/20... Training Step: 7293... Training loss: 0.9670... 0.1168 sec/batch\n", "Epoch: 16/20... Training Step: 7294... Training loss: 0.9260... 0.1187 sec/batch\n", "Epoch: 16/20... Training Step: 7295... Training loss: 0.9859... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7296... Training loss: 0.9927... 0.1200 sec/batch\n", "Epoch: 16/20... Training Step: 7297... Training loss: 0.8566... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7298... Training loss: 0.8312... 0.1145 sec/batch\n", "Epoch: 16/20... Training Step: 7299... Training loss: 1.0638... 0.1186 sec/batch\n", "Epoch: 16/20... Training Step: 7300... Training loss: 0.9062... 0.1160 sec/batch\n", "Epoch: 16/20... Training Step: 7301... Training loss: 0.9216... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7302... Training loss: 1.0963... 0.1174 sec/batch\n", "Epoch: 16/20... Training Step: 7303... Training loss: 0.8319... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7304... Training loss: 1.0693... 0.1137 sec/batch\n", "Epoch: 16/20... Training Step: 7305... Training loss: 0.9685... 0.1235 sec/batch\n", "Epoch: 16/20... Training Step: 7306... Training loss: 0.8661... 0.1169 sec/batch\n", "Epoch: 16/20... Training Step: 7307... Training loss: 0.8549... 0.1161 sec/batch\n", "Epoch: 16/20... Training Step: 7308... Training loss: 1.0942... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7309... Training loss: 1.0093... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7310... Training loss: 0.9761... 0.1193 sec/batch\n", "Epoch: 16/20... Training Step: 7311... Training loss: 0.8739... 0.1138 sec/batch\n", "Epoch: 16/20... Training Step: 7312... Training loss: 0.9419... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7313... Training loss: 0.8492... 0.1232 sec/batch\n", "Epoch: 16/20... Training Step: 7314... Training loss: 0.7005... 0.1172 sec/batch\n", "Epoch: 16/20... Training Step: 7315... Training loss: 1.0550... 0.1221 sec/batch\n", "Epoch: 16/20... Training Step: 7316... Training loss: 0.9364... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7317... Training loss: 0.8346... 0.1207 sec/batch\n", "Epoch: 16/20... Training Step: 7318... Training loss: 1.0312... 0.1216 sec/batch\n", "Epoch: 16/20... Training Step: 7319... Training loss: 1.1805... 0.1203 sec/batch\n", "Epoch: 16/20... Training Step: 7320... Training loss: 0.7913... 0.1230 sec/batch\n", "Epoch: 16/20... Training Step: 7321... Training loss: 1.0203... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7322... Training loss: 1.0855... 0.1163 sec/batch\n", "Epoch: 16/20... Training Step: 7323... Training loss: 0.8778... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7324... Training loss: 0.9792... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7325... Training loss: 0.9376... 0.1221 sec/batch\n", "Epoch: 16/20... Training Step: 7326... Training loss: 0.9029... 0.1177 sec/batch\n", "Epoch: 16/20... Training Step: 7327... Training loss: 1.0093... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7328... Training loss: 1.0112... 0.1192 sec/batch\n", "Epoch: 16/20... Training Step: 7329... Training loss: 1.0603... 0.1174 sec/batch\n", "Epoch: 16/20... Training Step: 7330... Training loss: 1.0764... 0.1163 sec/batch\n", "Epoch: 16/20... Training Step: 7331... Training loss: 0.7596... 0.1187 sec/batch\n", "Epoch: 16/20... Training Step: 7332... Training loss: 0.9766... 0.1137 sec/batch\n", "Epoch: 16/20... Training Step: 7333... Training loss: 0.9030... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7334... Training loss: 0.9787... 0.1228 sec/batch\n", "Epoch: 16/20... Training Step: 7335... Training loss: 0.9047... 0.1183 sec/batch\n", "Epoch: 16/20... Training Step: 7336... Training loss: 0.9618... 0.1157 sec/batch\n", "Epoch: 16/20... Training Step: 7337... Training loss: 1.0378... 0.1187 sec/batch\n", "Epoch: 16/20... Training Step: 7338... Training loss: 1.0733... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7339... Training loss: 0.9802... 0.1160 sec/batch\n", "Epoch: 16/20... Training Step: 7340... Training loss: 1.0639... 0.1176 sec/batch\n", "Epoch: 16/20... Training Step: 7341... Training loss: 0.9540... 0.1196 sec/batch\n", "Epoch: 16/20... Training Step: 7342... Training loss: 0.9072... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7343... Training loss: 0.9127... 0.1219 sec/batch\n", "Epoch: 16/20... Training Step: 7344... Training loss: 1.0027... 0.1198 sec/batch\n", "Epoch: 16/20... Training Step: 7345... Training loss: 0.8694... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7346... Training loss: 1.0554... 0.1222 sec/batch\n", "Epoch: 16/20... Training Step: 7347... Training loss: 1.0891... 0.1141 sec/batch\n", "Epoch: 16/20... Training Step: 7348... Training loss: 0.8809... 0.1161 sec/batch\n", "Epoch: 16/20... Training Step: 7349... Training loss: 0.8647... 0.1237 sec/batch\n", "Epoch: 16/20... Training Step: 7350... Training loss: 0.9802... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7351... Training loss: 0.9163... 0.1159 sec/batch\n", "Epoch: 16/20... Training Step: 7352... Training loss: 0.8882... 0.1183 sec/batch\n", "Epoch: 16/20... Training Step: 7353... Training loss: 0.9980... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7354... Training loss: 0.8858... 0.1231 sec/batch\n", "Epoch: 16/20... Training Step: 7355... Training loss: 0.9462... 0.1160 sec/batch\n", "Epoch: 16/20... Training Step: 7356... Training loss: 1.1218... 0.1218 sec/batch\n", "Epoch: 16/20... Training Step: 7357... Training loss: 0.9369... 0.1194 sec/batch\n", "Epoch: 16/20... Training Step: 7358... Training loss: 1.0149... 0.1193 sec/batch\n", "Epoch: 16/20... Training Step: 7359... Training loss: 1.1552... 0.1158 sec/batch\n", "Epoch: 16/20... Training Step: 7360... Training loss: 1.0324... 0.1197 sec/batch\n", "Epoch: 16/20... Training Step: 7361... Training loss: 0.8413... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7362... Training loss: 0.9688... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7363... Training loss: 1.0105... 0.1179 sec/batch\n", "Epoch: 16/20... Training Step: 7364... Training loss: 1.0749... 0.1178 sec/batch\n", "Epoch: 16/20... Training Step: 7365... Training loss: 0.8610... 0.1163 sec/batch\n", "Epoch: 16/20... Training Step: 7366... Training loss: 1.1139... 0.1218 sec/batch\n", "Epoch: 16/20... Training Step: 7367... Training loss: 0.9920... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7368... Training loss: 1.1791... 0.1149 sec/batch\n", "Epoch: 16/20... Training Step: 7369... Training loss: 1.0172... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7370... Training loss: 0.9526... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7371... Training loss: 0.8940... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7372... Training loss: 0.9337... 0.1171 sec/batch\n", "Epoch: 16/20... Training Step: 7373... Training loss: 1.0722... 0.1147 sec/batch\n", "Epoch: 16/20... Training Step: 7374... Training loss: 0.8702... 0.1182 sec/batch\n", "Epoch: 16/20... Training Step: 7375... Training loss: 1.0802... 0.1170 sec/batch\n", "Epoch: 16/20... Training Step: 7376... Training loss: 1.1055... 0.1230 sec/batch\n", "Epoch: 16/20... Training Step: 7377... Training loss: 1.0352... 0.1189 sec/batch\n", "Epoch: 16/20... Training Step: 7378... Training loss: 0.8461... 0.1175 sec/batch\n", "Epoch: 16/20... Training Step: 7379... Training loss: 0.8477... 0.1220 sec/batch\n", "Epoch: 16/20... Training Step: 7380... Training loss: 0.8587... 0.1188 sec/batch\n", "Epoch: 16/20... Training Step: 7381... Training loss: 1.0856... 0.1155 sec/batch\n", "Epoch: 16/20... Training Step: 7382... Training loss: 0.9543... 0.1185 sec/batch\n", "Epoch: 16/20... Training Step: 7383... Training loss: 0.9332... 0.1181 sec/batch\n", "Epoch: 16/20... Training Step: 7384... Training loss: 0.9917... 0.1140 sec/batch\n", "Epoch: 16/20... Training Step: 7385... Training loss: 0.8651... 0.1204 sec/batch\n", "Epoch: 16/20... Training Step: 7386... Training loss: 0.9978... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7387... Training loss: 0.9609... 0.1213 sec/batch\n", "Epoch: 16/20... Training Step: 7388... Training loss: 0.9847... 0.1156 sec/batch\n", "Epoch: 16/20... Training Step: 7389... Training loss: 0.8744... 0.1150 sec/batch\n", "Epoch: 16/20... Training Step: 7390... Training loss: 0.9034... 0.1195 sec/batch\n", "Epoch: 16/20... Training Step: 7391... Training loss: 1.0747... 0.1202 sec/batch\n", "Epoch: 16/20... Training Step: 7392... Training loss: 0.9331... 0.1174 sec/batch\n", "Epoch: 16/20... Training Step: 7393... Training loss: 1.1351... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7394... Training loss: 1.1514... 0.1191 sec/batch\n", "Epoch: 16/20... Training Step: 7395... Training loss: 0.9472... 0.1164 sec/batch\n", "Epoch: 16/20... Training Step: 7396... Training loss: 1.0009... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7397... Training loss: 0.8792... 0.1190 sec/batch\n", "Epoch: 16/20... Training Step: 7398... Training loss: 1.0517... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7399... Training loss: 1.1396... 0.1165 sec/batch\n", "Epoch: 16/20... Training Step: 7400... Training loss: 1.0700... 0.1141 sec/batch\n", "Epoch: 16/20... Training Step: 7401... Training loss: 0.8898... 0.1220 sec/batch\n", "Epoch: 16/20... Training Step: 7402... Training loss: 0.9373... 0.1180 sec/batch\n", "Epoch: 16/20... Training Step: 7403... Training loss: 0.9424... 0.1145 sec/batch\n", "Epoch: 16/20... Training Step: 7404... Training loss: 0.9334... 0.1184 sec/batch\n", "Epoch: 16/20... Training Step: 7405... Training loss: 0.9794... 0.1311 sec/batch\n", "Epoch: 16/20... Training Step: 7406... Training loss: 0.8790... 0.1259 sec/batch\n", "Epoch: 16/20... Training Step: 7407... Training loss: 1.0081... 0.1206 sec/batch\n", "Epoch: 16/20... Training Step: 7408... Training loss: 0.8851... 0.1200 sec/batch\n", "Epoch: 16/20... Training Step: 7409... Training loss: 1.0115... 0.1204 sec/batch\n", "Epoch: 16/20... Training Step: 7410... Training loss: 1.0029... 0.1269 sec/batch\n", "Epoch: 16/20... Training Step: 7411... Training loss: 0.7710... 0.1273 sec/batch\n", "Epoch: 16/20... Training Step: 7412... Training loss: 0.9725... 0.1207 sec/batch\n", "Epoch: 16/20... Training Step: 7413... Training loss: 0.8879... 0.1260 sec/batch\n", "Epoch: 16/20... Training Step: 7414... Training loss: 1.0535... 0.1238 sec/batch\n", "Epoch: 16/20... Training Step: 7415... Training loss: 0.9410... 0.1201 sec/batch\n", "Epoch: 16/20... Training Step: 7416... Training loss: 0.8912... 0.1203 sec/batch\n", "Epoch: 16/20... Training Step: 7417... Training loss: 1.0432... 0.1255 sec/batch\n", "Epoch: 16/20... Training Step: 7418... Training loss: 0.8465... 0.1327 sec/batch\n", "Epoch: 16/20... Training Step: 7419... Training loss: 0.8933... 0.1288 sec/batch\n", "Epoch: 16/20... Training Step: 7420... Training loss: 0.9438... 0.1233 sec/batch\n", "Epoch: 16/20... Training Step: 7421... Training loss: 0.9202... 0.1296 sec/batch\n", "Epoch: 16/20... Training Step: 7422... Training loss: 0.8877... 0.1273 sec/batch\n", "Epoch: 16/20... Training Step: 7423... Training loss: 0.9089... 0.1278 sec/batch\n", "Epoch: 16/20... Training Step: 7424... Training loss: 0.9856... 0.1247 sec/batch\n", "Epoch: 17/20... Training Step: 7425... Training loss: 1.0333... 0.1220 sec/batch\n", "Epoch: 17/20... Training Step: 7426... Training loss: 1.1775... 0.1244 sec/batch\n", "Epoch: 17/20... Training Step: 7427... Training loss: 0.9914... 0.1266 sec/batch\n", "Epoch: 17/20... Training Step: 7428... Training loss: 0.9352... 0.1262 sec/batch\n", "Epoch: 17/20... Training Step: 7429... Training loss: 1.0677... 0.1172 sec/batch\n", "Epoch: 17/20... Training Step: 7430... Training loss: 0.9514... 0.1308 sec/batch\n", "Epoch: 17/20... Training Step: 7431... Training loss: 1.0281... 0.1258 sec/batch\n", "Epoch: 17/20... Training Step: 7432... Training loss: 0.9520... 0.1268 sec/batch\n", "Epoch: 17/20... Training Step: 7433... Training loss: 0.8031... 0.1223 sec/batch\n", "Epoch: 17/20... Training Step: 7434... Training loss: 0.9197... 0.1221 sec/batch\n", "Epoch: 17/20... Training Step: 7435... Training loss: 0.9625... 0.1261 sec/batch\n", "Epoch: 17/20... Training Step: 7436... Training loss: 0.8654... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7437... Training loss: 1.1076... 0.1271 sec/batch\n", "Epoch: 17/20... Training Step: 7438... Training loss: 0.8309... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7439... Training loss: 1.0039... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7440... Training loss: 1.0778... 0.1243 sec/batch\n", "Epoch: 17/20... Training Step: 7441... Training loss: 0.9140... 0.1210 sec/batch\n", "Epoch: 17/20... Training Step: 7442... Training loss: 0.8599... 0.1175 sec/batch\n", "Epoch: 17/20... Training Step: 7443... Training loss: 0.9571... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7444... Training loss: 0.8579... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7445... Training loss: 1.0841... 0.1180 sec/batch\n", "Epoch: 17/20... Training Step: 7446... Training loss: 0.9871... 0.1148 sec/batch\n", "Epoch: 17/20... Training Step: 7447... Training loss: 0.9422... 0.1233 sec/batch\n", "Epoch: 17/20... Training Step: 7448... Training loss: 1.0039... 0.1142 sec/batch\n", "Epoch: 17/20... Training Step: 7449... Training loss: 0.9928... 0.1170 sec/batch\n", "Epoch: 17/20... Training Step: 7450... Training loss: 1.0169... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7451... Training loss: 0.9827... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7452... Training loss: 0.9841... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7453... Training loss: 0.9129... 0.1175 sec/batch\n", "Epoch: 17/20... Training Step: 7454... Training loss: 1.0231... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7455... Training loss: 0.9552... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7456... Training loss: 0.8649... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7457... Training loss: 0.8092... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7458... Training loss: 0.9689... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7459... Training loss: 0.9043... 0.1220 sec/batch\n", "Epoch: 17/20... Training Step: 7460... Training loss: 0.9120... 0.1142 sec/batch\n", "Epoch: 17/20... Training Step: 7461... Training loss: 0.9656... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7462... Training loss: 0.8794... 0.1206 sec/batch\n", "Epoch: 17/20... Training Step: 7463... Training loss: 0.9485... 0.1180 sec/batch\n", "Epoch: 17/20... Training Step: 7464... Training loss: 1.2767... 0.1182 sec/batch\n", "Epoch: 17/20... Training Step: 7465... Training loss: 0.8201... 0.1178 sec/batch\n", "Epoch: 17/20... Training Step: 7466... Training loss: 0.8500... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7467... Training loss: 1.1227... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7468... Training loss: 0.7817... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7469... Training loss: 0.9291... 0.1159 sec/batch\n", "Epoch: 17/20... Training Step: 7470... Training loss: 0.9822... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7471... Training loss: 1.1247... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7472... Training loss: 1.0248... 0.1177 sec/batch\n", "Epoch: 17/20... Training Step: 7473... Training loss: 0.8430... 0.1199 sec/batch\n", "Epoch: 17/20... Training Step: 7474... Training loss: 0.9110... 0.1227 sec/batch\n", "Epoch: 17/20... Training Step: 7475... Training loss: 0.9535... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7476... Training loss: 0.8898... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7477... Training loss: 0.8979... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7478... Training loss: 0.8340... 0.1216 sec/batch\n", "Epoch: 17/20... Training Step: 7479... Training loss: 0.8538... 0.1209 sec/batch\n", "Epoch: 17/20... Training Step: 7480... Training loss: 0.8215... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7481... Training loss: 0.8283... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7482... Training loss: 1.0017... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7483... Training loss: 0.9329... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7484... Training loss: 0.9490... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7485... Training loss: 0.8781... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7486... Training loss: 1.1409... 0.1158 sec/batch\n", "Epoch: 17/20... Training Step: 7487... Training loss: 0.8490... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7488... Training loss: 1.0230... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7489... Training loss: 0.8984... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7490... Training loss: 0.9909... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7491... Training loss: 0.9871... 0.1216 sec/batch\n", "Epoch: 17/20... Training Step: 7492... Training loss: 1.0095... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7493... Training loss: 0.8722... 0.1160 sec/batch\n", "Epoch: 17/20... Training Step: 7494... Training loss: 1.0246... 0.1218 sec/batch\n", "Epoch: 17/20... Training Step: 7495... Training loss: 1.1678... 0.1153 sec/batch\n", "Epoch: 17/20... Training Step: 7496... Training loss: 0.7873... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7497... Training loss: 0.9370... 0.1182 sec/batch\n", "Epoch: 17/20... Training Step: 7498... Training loss: 0.7969... 0.1179 sec/batch\n", "Epoch: 17/20... Training Step: 7499... Training loss: 1.0537... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7500... Training loss: 0.8932... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7501... Training loss: 0.9333... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7502... Training loss: 0.9720... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7503... Training loss: 0.9630... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7504... Training loss: 0.8621... 0.1142 sec/batch\n", "Epoch: 17/20... Training Step: 7505... Training loss: 1.0115... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7506... Training loss: 1.1031... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7507... Training loss: 0.8903... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7508... Training loss: 1.0580... 0.1232 sec/batch\n", "Epoch: 17/20... Training Step: 7509... Training loss: 0.9510... 0.1159 sec/batch\n", "Epoch: 17/20... Training Step: 7510... Training loss: 1.0550... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7511... Training loss: 0.8593... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7512... Training loss: 1.0617... 0.1196 sec/batch\n", "Epoch: 17/20... Training Step: 7513... Training loss: 0.9661... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7514... Training loss: 0.9131... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7515... Training loss: 1.0762... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7516... Training loss: 1.0090... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7517... Training loss: 0.9239... 0.1211 sec/batch\n", "Epoch: 17/20... Training Step: 7518... Training loss: 1.0661... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7519... Training loss: 0.9472... 0.1161 sec/batch\n", "Epoch: 17/20... Training Step: 7520... Training loss: 0.8851... 0.1160 sec/batch\n", "Epoch: 17/20... Training Step: 7521... Training loss: 1.0625... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7522... Training loss: 1.0531... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7523... Training loss: 1.0132... 0.1201 sec/batch\n", "Epoch: 17/20... Training Step: 7524... Training loss: 1.0897... 0.1235 sec/batch\n", "Epoch: 17/20... Training Step: 7525... Training loss: 0.9534... 0.1158 sec/batch\n", "Epoch: 17/20... Training Step: 7526... Training loss: 1.0157... 0.1130 sec/batch\n", "Epoch: 17/20... Training Step: 7527... Training loss: 1.0468... 0.1164 sec/batch\n", "Epoch: 17/20... Training Step: 7528... Training loss: 1.0618... 0.1160 sec/batch\n", "Epoch: 17/20... Training Step: 7529... Training loss: 1.0175... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7530... Training loss: 1.0706... 0.1178 sec/batch\n", "Epoch: 17/20... Training Step: 7531... Training loss: 0.7300... 0.1151 sec/batch\n", "Epoch: 17/20... Training Step: 7532... Training loss: 1.0010... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7533... Training loss: 1.0156... 0.1206 sec/batch\n", "Epoch: 17/20... Training Step: 7534... Training loss: 0.9551... 0.1145 sec/batch\n", "Epoch: 17/20... Training Step: 7535... Training loss: 0.9636... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7536... Training loss: 0.9775... 0.1210 sec/batch\n", "Epoch: 17/20... Training Step: 7537... Training loss: 0.9876... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7538... Training loss: 0.9509... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7539... Training loss: 0.9307... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7540... Training loss: 0.9075... 0.1214 sec/batch\n", "Epoch: 17/20... Training Step: 7541... Training loss: 0.9341... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7542... Training loss: 0.8804... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7543... Training loss: 0.9352... 0.1235 sec/batch\n", "Epoch: 17/20... Training Step: 7544... Training loss: 0.8452... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7545... Training loss: 0.9517... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7546... Training loss: 1.0174... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7547... Training loss: 1.0147... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7548... Training loss: 0.9275... 0.1180 sec/batch\n", "Epoch: 17/20... Training Step: 7549... Training loss: 1.0263... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7550... Training loss: 0.8176... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7551... Training loss: 0.9501... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7552... Training loss: 1.0168... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7553... Training loss: 1.0768... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7554... Training loss: 0.8834... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7555... Training loss: 1.0707... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7556... Training loss: 0.9973... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7557... Training loss: 0.9818... 0.1221 sec/batch\n", "Epoch: 17/20... Training Step: 7558... Training loss: 1.0164... 0.1209 sec/batch\n", "Epoch: 17/20... Training Step: 7559... Training loss: 0.9536... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7560... Training loss: 0.8337... 0.1149 sec/batch\n", "Epoch: 17/20... Training Step: 7561... Training loss: 0.7724... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7562... Training loss: 0.9340... 0.1217 sec/batch\n", "Epoch: 17/20... Training Step: 7563... Training loss: 0.8227... 0.1179 sec/batch\n", "Epoch: 17/20... Training Step: 7564... Training loss: 0.7714... 0.1182 sec/batch\n", "Epoch: 17/20... Training Step: 7565... Training loss: 0.7822... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7566... Training loss: 0.8526... 0.1178 sec/batch\n", "Epoch: 17/20... Training Step: 7567... Training loss: 0.9067... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7568... Training loss: 0.9889... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7569... Training loss: 1.0468... 0.1177 sec/batch\n", "Epoch: 17/20... Training Step: 7570... Training loss: 0.8339... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7571... Training loss: 0.9876... 0.1184 sec/batch\n", "Epoch: 17/20... Training Step: 7572... Training loss: 0.8769... 0.1224 sec/batch\n", "Epoch: 17/20... Training Step: 7573... Training loss: 0.8354... 0.1141 sec/batch\n", "Epoch: 17/20... Training Step: 7574... Training loss: 0.9154... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7575... Training loss: 0.8165... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7576... Training loss: 1.0832... 0.1199 sec/batch\n", "Epoch: 17/20... Training Step: 7577... Training loss: 1.0626... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7578... Training loss: 1.0009... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7579... Training loss: 0.8916... 0.1199 sec/batch\n", "Epoch: 17/20... Training Step: 7580... Training loss: 0.8597... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7581... Training loss: 1.0149... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7582... Training loss: 0.9623... 0.1242 sec/batch\n", "Epoch: 17/20... Training Step: 7583... Training loss: 0.8451... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7584... Training loss: 0.8855... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7585... Training loss: 0.9494... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7586... Training loss: 0.9350... 0.1203 sec/batch\n", "Epoch: 17/20... Training Step: 7587... Training loss: 1.0929... 0.1216 sec/batch\n", "Epoch: 17/20... Training Step: 7588... Training loss: 0.8620... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7589... Training loss: 0.9157... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7590... Training loss: 1.0346... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7591... Training loss: 0.7260... 0.1230 sec/batch\n", "Epoch: 17/20... Training Step: 7592... Training loss: 1.1059... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7593... Training loss: 0.8651... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7594... Training loss: 0.8713... 0.1210 sec/batch\n", "Epoch: 17/20... Training Step: 7595... Training loss: 1.0930... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7596... Training loss: 1.0123... 0.1137 sec/batch\n", "Epoch: 17/20... Training Step: 7597... Training loss: 0.8744... 0.1164 sec/batch\n", "Epoch: 17/20... Training Step: 7598... Training loss: 0.9491... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7599... Training loss: 1.0495... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7600... Training loss: 0.9545... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7601... Training loss: 0.8928... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7602... Training loss: 1.0662... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7603... Training loss: 0.7664... 0.1240 sec/batch\n", "Epoch: 17/20... Training Step: 7604... Training loss: 1.0224... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7605... Training loss: 0.7197... 0.1161 sec/batch\n", "Epoch: 17/20... Training Step: 7606... Training loss: 1.1342... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7607... Training loss: 1.0307... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7608... Training loss: 0.9343... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7609... Training loss: 0.9865... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7610... Training loss: 1.0059... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7611... Training loss: 0.9068... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7612... Training loss: 0.8240... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7613... Training loss: 1.0441... 0.1164 sec/batch\n", "Epoch: 17/20... Training Step: 7614... Training loss: 0.8881... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7615... Training loss: 0.9123... 0.1178 sec/batch\n", "Epoch: 17/20... Training Step: 7616... Training loss: 0.9527... 0.1125 sec/batch\n", "Epoch: 17/20... Training Step: 7617... Training loss: 0.9239... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7618... Training loss: 0.8655... 0.1207 sec/batch\n", "Epoch: 17/20... Training Step: 7619... Training loss: 0.9059... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7620... Training loss: 1.0119... 0.1210 sec/batch\n", "Epoch: 17/20... Training Step: 7621... Training loss: 0.9645... 0.1203 sec/batch\n", "Epoch: 17/20... Training Step: 7622... Training loss: 0.9696... 0.1148 sec/batch\n", "Epoch: 17/20... Training Step: 7623... Training loss: 0.7758... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7624... Training loss: 0.8919... 0.1176 sec/batch\n", "Epoch: 17/20... Training Step: 7625... Training loss: 1.0184... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7626... Training loss: 1.0663... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7627... Training loss: 0.9039... 0.1150 sec/batch\n", "Epoch: 17/20... Training Step: 7628... Training loss: 1.1074... 0.1201 sec/batch\n", "Epoch: 17/20... Training Step: 7629... Training loss: 0.7958... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7630... Training loss: 0.9493... 0.1201 sec/batch\n", "Epoch: 17/20... Training Step: 7631... Training loss: 0.8707... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7632... Training loss: 0.9724... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7633... Training loss: 0.9428... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7634... Training loss: 0.7927... 0.1145 sec/batch\n", "Epoch: 17/20... Training Step: 7635... Training loss: 0.9360... 0.1224 sec/batch\n", "Epoch: 17/20... Training Step: 7636... Training loss: 1.0147... 0.1120 sec/batch\n", "Epoch: 17/20... Training Step: 7637... Training loss: 0.9434... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7638... Training loss: 0.8344... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7639... Training loss: 0.9031... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7640... Training loss: 0.9628... 0.1165 sec/batch\n", "Epoch: 17/20... Training Step: 7641... Training loss: 0.9721... 0.1218 sec/batch\n", "Epoch: 17/20... Training Step: 7642... Training loss: 1.0787... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7643... Training loss: 1.1125... 0.1138 sec/batch\n", "Epoch: 17/20... Training Step: 7644... Training loss: 0.9989... 0.1145 sec/batch\n", "Epoch: 17/20... Training Step: 7645... Training loss: 0.8999... 0.1239 sec/batch\n", "Epoch: 17/20... Training Step: 7646... Training loss: 1.2745... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7647... Training loss: 1.0430... 0.1196 sec/batch\n", "Epoch: 17/20... Training Step: 7648... Training loss: 1.1943... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7649... Training loss: 1.0093... 0.1144 sec/batch\n", "Epoch: 17/20... Training Step: 7650... Training loss: 1.2891... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7651... Training loss: 1.0854... 0.1125 sec/batch\n", "Epoch: 17/20... Training Step: 7652... Training loss: 0.9130... 0.1216 sec/batch\n", "Epoch: 17/20... Training Step: 7653... Training loss: 1.0676... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7654... Training loss: 0.9209... 0.1145 sec/batch\n", "Epoch: 17/20... Training Step: 7655... Training loss: 0.9432... 0.1258 sec/batch\n", "Epoch: 17/20... Training Step: 7656... Training loss: 1.0736... 0.1207 sec/batch\n", "Epoch: 17/20... Training Step: 7657... Training loss: 1.1605... 0.1197 sec/batch\n", "Epoch: 17/20... Training Step: 7658... Training loss: 0.9957... 0.1172 sec/batch\n", "Epoch: 17/20... Training Step: 7659... Training loss: 0.9871... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7660... Training loss: 0.9200... 0.1134 sec/batch\n", "Epoch: 17/20... Training Step: 7661... Training loss: 1.0561... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7662... Training loss: 0.8203... 0.1156 sec/batch\n", "Epoch: 17/20... Training Step: 7663... Training loss: 0.9567... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7664... Training loss: 0.9583... 0.1212 sec/batch\n", "Epoch: 17/20... Training Step: 7665... Training loss: 0.8857... 0.1217 sec/batch\n", "Epoch: 17/20... Training Step: 7666... Training loss: 0.9042... 0.1216 sec/batch\n", "Epoch: 17/20... Training Step: 7667... Training loss: 1.0988... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7668... Training loss: 0.9539... 0.1149 sec/batch\n", "Epoch: 17/20... Training Step: 7669... Training loss: 1.0620... 0.1153 sec/batch\n", "Epoch: 17/20... Training Step: 7670... Training loss: 0.8340... 0.1207 sec/batch\n", "Epoch: 17/20... Training Step: 7671... Training loss: 0.9603... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7672... Training loss: 1.1377... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7673... Training loss: 1.0218... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7674... Training loss: 0.8680... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7675... Training loss: 1.0277... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7676... Training loss: 0.9630... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7677... Training loss: 0.9364... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7678... Training loss: 1.0068... 0.1147 sec/batch\n", "Epoch: 17/20... Training Step: 7679... Training loss: 0.8872... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7680... Training loss: 1.0560... 0.1180 sec/batch\n", "Epoch: 17/20... Training Step: 7681... Training loss: 0.9233... 0.1174 sec/batch\n", "Epoch: 17/20... Training Step: 7682... Training loss: 0.9237... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7683... Training loss: 0.7914... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7684... Training loss: 0.9460... 0.1170 sec/batch\n", "Epoch: 17/20... Training Step: 7685... Training loss: 1.0508... 0.1182 sec/batch\n", "Epoch: 17/20... Training Step: 7686... Training loss: 1.0089... 0.1246 sec/batch\n", "Epoch: 17/20... Training Step: 7687... Training loss: 1.0545... 0.1272 sec/batch\n", "Epoch: 17/20... Training Step: 7688... Training loss: 1.0533... 0.1428 sec/batch\n", "Epoch: 17/20... Training Step: 7689... Training loss: 1.0566... 0.1436 sec/batch\n", "Epoch: 17/20... Training Step: 7690... Training loss: 1.0239... 0.1250 sec/batch\n", "Epoch: 17/20... Training Step: 7691... Training loss: 1.0952... 0.1226 sec/batch\n", "Epoch: 17/20... Training Step: 7692... Training loss: 1.1392... 0.1285 sec/batch\n", "Epoch: 17/20... Training Step: 7693... Training loss: 1.0355... 0.1224 sec/batch\n", "Epoch: 17/20... Training Step: 7694... Training loss: 1.0973... 0.1127 sec/batch\n", "Epoch: 17/20... Training Step: 7695... Training loss: 0.9231... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7696... Training loss: 1.1138... 0.1139 sec/batch\n", "Epoch: 17/20... Training Step: 7697... Training loss: 1.1840... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7698... Training loss: 1.0642... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7699... Training loss: 1.0121... 0.1218 sec/batch\n", "Epoch: 17/20... Training Step: 7700... Training loss: 0.9120... 0.1706 sec/batch\n", "Epoch: 17/20... Training Step: 7701... Training loss: 0.9362... 0.1575 sec/batch\n", "Epoch: 17/20... Training Step: 7702... Training loss: 1.1631... 0.1387 sec/batch\n", "Epoch: 17/20... Training Step: 7703... Training loss: 0.9457... 0.1363 sec/batch\n", "Epoch: 17/20... Training Step: 7704... Training loss: 0.8650... 0.1383 sec/batch\n", "Epoch: 17/20... Training Step: 7705... Training loss: 0.9386... 0.1288 sec/batch\n", "Epoch: 17/20... Training Step: 7706... Training loss: 1.1486... 0.1293 sec/batch\n", "Epoch: 17/20... Training Step: 7707... Training loss: 0.9780... 0.1361 sec/batch\n", "Epoch: 17/20... Training Step: 7708... Training loss: 1.0553... 0.1321 sec/batch\n", "Epoch: 17/20... Training Step: 7709... Training loss: 0.8693... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7710... Training loss: 0.9172... 0.1239 sec/batch\n", "Epoch: 17/20... Training Step: 7711... Training loss: 1.0286... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7712... Training loss: 1.0101... 0.1175 sec/batch\n", "Epoch: 17/20... Training Step: 7713... Training loss: 0.9967... 0.1129 sec/batch\n", "Epoch: 17/20... Training Step: 7714... Training loss: 1.0746... 0.1134 sec/batch\n", "Epoch: 17/20... Training Step: 7715... Training loss: 1.0987... 0.1136 sec/batch\n", "Epoch: 17/20... Training Step: 7716... Training loss: 0.9198... 0.1126 sec/batch\n", "Epoch: 17/20... Training Step: 7717... Training loss: 0.9158... 0.1145 sec/batch\n", "Epoch: 17/20... Training Step: 7718... Training loss: 0.9746... 0.1147 sec/batch\n", "Epoch: 17/20... Training Step: 7719... Training loss: 0.9864... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7720... Training loss: 1.1914... 0.1141 sec/batch\n", "Epoch: 17/20... Training Step: 7721... Training loss: 0.8432... 0.1139 sec/batch\n", "Epoch: 17/20... Training Step: 7722... Training loss: 0.8635... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7723... Training loss: 0.9918... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7724... Training loss: 1.0455... 0.1153 sec/batch\n", "Epoch: 17/20... Training Step: 7725... Training loss: 1.0136... 0.1135 sec/batch\n", "Epoch: 17/20... Training Step: 7726... Training loss: 0.9810... 0.1159 sec/batch\n", "Epoch: 17/20... Training Step: 7727... Training loss: 0.8493... 0.1177 sec/batch\n", "Epoch: 17/20... Training Step: 7728... Training loss: 1.2489... 0.1187 sec/batch\n", "Epoch: 17/20... Training Step: 7729... Training loss: 0.8705... 0.1209 sec/batch\n", "Epoch: 17/20... Training Step: 7730... Training loss: 0.9788... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7731... Training loss: 1.0159... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7732... Training loss: 1.1089... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7733... Training loss: 1.0211... 0.1265 sec/batch\n", "Epoch: 17/20... Training Step: 7734... Training loss: 1.0765... 0.1226 sec/batch\n", "Epoch: 17/20... Training Step: 7735... Training loss: 0.9183... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7736... Training loss: 0.8194... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7737... Training loss: 0.9145... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7738... Training loss: 0.9098... 0.1172 sec/batch\n", "Epoch: 17/20... Training Step: 7739... Training loss: 0.8095... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7740... Training loss: 0.8436... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7741... Training loss: 0.8461... 0.1174 sec/batch\n", "Epoch: 17/20... Training Step: 7742... Training loss: 0.8953... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7743... Training loss: 0.9596... 0.1222 sec/batch\n", "Epoch: 17/20... Training Step: 7744... Training loss: 0.8824... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7745... Training loss: 0.8691... 0.1195 sec/batch\n", "Epoch: 17/20... Training Step: 7746... Training loss: 1.0833... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7747... Training loss: 0.8606... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7748... Training loss: 0.9145... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7749... Training loss: 0.9241... 0.1160 sec/batch\n", "Epoch: 17/20... Training Step: 7750... Training loss: 0.8388... 0.1202 sec/batch\n", "Epoch: 17/20... Training Step: 7751... Training loss: 0.9148... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7752... Training loss: 0.9581... 0.1130 sec/batch\n", "Epoch: 17/20... Training Step: 7753... Training loss: 1.0082... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7754... Training loss: 0.8972... 0.1152 sec/batch\n", "Epoch: 17/20... Training Step: 7755... Training loss: 0.9282... 0.1137 sec/batch\n", "Epoch: 17/20... Training Step: 7756... Training loss: 1.0269... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7757... Training loss: 0.8683... 0.1178 sec/batch\n", "Epoch: 17/20... Training Step: 7758... Training loss: 0.9311... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7759... Training loss: 0.9769... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7760... Training loss: 0.9495... 0.1195 sec/batch\n", "Epoch: 17/20... Training Step: 7761... Training loss: 0.7861... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7762... Training loss: 0.8909... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7763... Training loss: 0.9373... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7764... Training loss: 1.0478... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7765... Training loss: 0.9689... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7766... Training loss: 1.0228... 0.1180 sec/batch\n", "Epoch: 17/20... Training Step: 7767... Training loss: 0.7975... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7768... Training loss: 0.9392... 0.1176 sec/batch\n", "Epoch: 17/20... Training Step: 7769... Training loss: 0.9383... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7770... Training loss: 0.9312... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7771... Training loss: 0.9745... 0.1146 sec/batch\n", "Epoch: 17/20... Training Step: 7772... Training loss: 1.0195... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7773... Training loss: 0.9749... 0.1172 sec/batch\n", "Epoch: 17/20... Training Step: 7774... Training loss: 0.9201... 0.1195 sec/batch\n", "Epoch: 17/20... Training Step: 7775... Training loss: 1.0011... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7776... Training loss: 0.9899... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7777... Training loss: 0.9441... 0.1151 sec/batch\n", "Epoch: 17/20... Training Step: 7778... Training loss: 0.6978... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7779... Training loss: 1.1155... 0.1148 sec/batch\n", "Epoch: 17/20... Training Step: 7780... Training loss: 0.9921... 0.1220 sec/batch\n", "Epoch: 17/20... Training Step: 7781... Training loss: 0.8366... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7782... Training loss: 0.9573... 0.1223 sec/batch\n", "Epoch: 17/20... Training Step: 7783... Training loss: 1.0737... 0.1164 sec/batch\n", "Epoch: 17/20... Training Step: 7784... Training loss: 0.7820... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7785... Training loss: 1.0242... 0.1161 sec/batch\n", "Epoch: 17/20... Training Step: 7786... Training loss: 0.9245... 0.1199 sec/batch\n", "Epoch: 17/20... Training Step: 7787... Training loss: 0.7734... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7788... Training loss: 0.9467... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7789... Training loss: 0.9479... 0.1202 sec/batch\n", "Epoch: 17/20... Training Step: 7790... Training loss: 0.9764... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7791... Training loss: 1.0488... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7792... Training loss: 1.0427... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7793... Training loss: 1.0280... 0.1125 sec/batch\n", "Epoch: 17/20... Training Step: 7794... Training loss: 0.9535... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7795... Training loss: 0.7960... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7796... Training loss: 1.1333... 0.1203 sec/batch\n", "Epoch: 17/20... Training Step: 7797... Training loss: 0.8023... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7798... Training loss: 0.9380... 0.1126 sec/batch\n", "Epoch: 17/20... Training Step: 7799... Training loss: 0.9718... 0.1179 sec/batch\n", "Epoch: 17/20... Training Step: 7800... Training loss: 1.1013... 0.1158 sec/batch\n", "Epoch: 17/20... Training Step: 7801... Training loss: 1.0058... 0.1218 sec/batch\n", "Epoch: 17/20... Training Step: 7802... Training loss: 0.9654... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7803... Training loss: 1.0426... 0.1166 sec/batch\n", "Epoch: 17/20... Training Step: 7804... Training loss: 1.0371... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7805... Training loss: 0.8512... 0.1136 sec/batch\n", "Epoch: 17/20... Training Step: 7806... Training loss: 0.9506... 0.1199 sec/batch\n", "Epoch: 17/20... Training Step: 7807... Training loss: 0.8110... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7808... Training loss: 1.0672... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7809... Training loss: 0.8601... 0.1158 sec/batch\n", "Epoch: 17/20... Training Step: 7810... Training loss: 0.9556... 0.1154 sec/batch\n", "Epoch: 17/20... Training Step: 7811... Training loss: 0.9254... 0.1162 sec/batch\n", "Epoch: 17/20... Training Step: 7812... Training loss: 0.9630... 0.1200 sec/batch\n", "Epoch: 17/20... Training Step: 7813... Training loss: 0.8986... 0.1219 sec/batch\n", "Epoch: 17/20... Training Step: 7814... Training loss: 0.8760... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7815... Training loss: 0.9856... 0.1175 sec/batch\n", "Epoch: 17/20... Training Step: 7816... Training loss: 0.9426... 0.1181 sec/batch\n", "Epoch: 17/20... Training Step: 7817... Training loss: 1.0794... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7818... Training loss: 0.9432... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7819... Training loss: 0.8393... 0.1183 sec/batch\n", "Epoch: 17/20... Training Step: 7820... Training loss: 1.0847... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7821... Training loss: 0.9931... 0.1146 sec/batch\n", "Epoch: 17/20... Training Step: 7822... Training loss: 1.0252... 0.1157 sec/batch\n", "Epoch: 17/20... Training Step: 7823... Training loss: 1.0269... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7824... Training loss: 1.0074... 0.1141 sec/batch\n", "Epoch: 17/20... Training Step: 7825... Training loss: 0.9165... 0.1132 sec/batch\n", "Epoch: 17/20... Training Step: 7826... Training loss: 0.9436... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7827... Training loss: 0.8004... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7828... Training loss: 1.0844... 0.1195 sec/batch\n", "Epoch: 17/20... Training Step: 7829... Training loss: 1.0157... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7830... Training loss: 1.0270... 0.1174 sec/batch\n", "Epoch: 17/20... Training Step: 7831... Training loss: 0.8478... 0.1205 sec/batch\n", "Epoch: 17/20... Training Step: 7832... Training loss: 1.1326... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7833... Training loss: 0.9257... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7834... Training loss: 0.9394... 0.1175 sec/batch\n", "Epoch: 17/20... Training Step: 7835... Training loss: 0.8721... 0.1190 sec/batch\n", "Epoch: 17/20... Training Step: 7836... Training loss: 0.9763... 0.1155 sec/batch\n", "Epoch: 17/20... Training Step: 7837... Training loss: 0.8704... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7838... Training loss: 0.8241... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7839... Training loss: 1.0608... 0.1123 sec/batch\n", "Epoch: 17/20... Training Step: 7840... Training loss: 1.0637... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7841... Training loss: 0.9631... 0.1159 sec/batch\n", "Epoch: 17/20... Training Step: 7842... Training loss: 0.7367... 0.1229 sec/batch\n", "Epoch: 17/20... Training Step: 7843... Training loss: 0.7610... 0.1188 sec/batch\n", "Epoch: 17/20... Training Step: 7844... Training loss: 0.7639... 0.1208 sec/batch\n", "Epoch: 17/20... Training Step: 7845... Training loss: 0.9957... 0.1195 sec/batch\n", "Epoch: 17/20... Training Step: 7846... Training loss: 0.9063... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7847... Training loss: 0.8249... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7848... Training loss: 0.9927... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7849... Training loss: 0.9300... 0.1185 sec/batch\n", "Epoch: 17/20... Training Step: 7850... Training loss: 0.9739... 0.1159 sec/batch\n", "Epoch: 17/20... Training Step: 7851... Training loss: 0.9649... 0.1143 sec/batch\n", "Epoch: 17/20... Training Step: 7852... Training loss: 1.0460... 0.1169 sec/batch\n", "Epoch: 17/20... Training Step: 7853... Training loss: 0.7819... 0.1179 sec/batch\n", "Epoch: 17/20... Training Step: 7854... Training loss: 0.8943... 0.1203 sec/batch\n", "Epoch: 17/20... Training Step: 7855... Training loss: 1.1191... 0.1172 sec/batch\n", "Epoch: 17/20... Training Step: 7856... Training loss: 0.8638... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7857... Training loss: 1.0553... 0.1152 sec/batch\n", "Epoch: 17/20... Training Step: 7858... Training loss: 1.1494... 0.1158 sec/batch\n", "Epoch: 17/20... Training Step: 7859... Training loss: 0.8994... 0.1226 sec/batch\n", "Epoch: 17/20... Training Step: 7860... Training loss: 0.8861... 0.1164 sec/batch\n", "Epoch: 17/20... Training Step: 7861... Training loss: 0.8727... 0.1160 sec/batch\n", "Epoch: 17/20... Training Step: 7862... Training loss: 0.9328... 0.1189 sec/batch\n", "Epoch: 17/20... Training Step: 7863... Training loss: 1.2164... 0.1236 sec/batch\n", "Epoch: 17/20... Training Step: 7864... Training loss: 0.9871... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7865... Training loss: 0.8112... 0.1173 sec/batch\n", "Epoch: 17/20... Training Step: 7866... Training loss: 0.9822... 0.1107 sec/batch\n", "Epoch: 17/20... Training Step: 7867... Training loss: 0.9252... 0.1127 sec/batch\n", "Epoch: 17/20... Training Step: 7868... Training loss: 0.9748... 0.1201 sec/batch\n", "Epoch: 17/20... Training Step: 7869... Training loss: 0.9338... 0.1140 sec/batch\n", "Epoch: 17/20... Training Step: 7870... Training loss: 0.8434... 0.1197 sec/batch\n", "Epoch: 17/20... Training Step: 7871... Training loss: 0.9744... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7872... Training loss: 0.8666... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7873... Training loss: 0.9455... 0.1120 sec/batch\n", "Epoch: 17/20... Training Step: 7874... Training loss: 0.9204... 0.1191 sec/batch\n", "Epoch: 17/20... Training Step: 7875... Training loss: 0.8797... 0.1174 sec/batch\n", "Epoch: 17/20... Training Step: 7876... Training loss: 0.9268... 0.1192 sec/batch\n", "Epoch: 17/20... Training Step: 7877... Training loss: 0.8885... 0.1204 sec/batch\n", "Epoch: 17/20... Training Step: 7878... Training loss: 1.0372... 0.1215 sec/batch\n", "Epoch: 17/20... Training Step: 7879... Training loss: 1.0256... 0.1163 sec/batch\n", "Epoch: 17/20... Training Step: 7880... Training loss: 0.8117... 0.1168 sec/batch\n", "Epoch: 17/20... Training Step: 7881... Training loss: 0.9232... 0.1167 sec/batch\n", "Epoch: 17/20... Training Step: 7882... Training loss: 0.7020... 0.1194 sec/batch\n", "Epoch: 17/20... Training Step: 7883... Training loss: 0.7614... 0.1193 sec/batch\n", "Epoch: 17/20... Training Step: 7884... Training loss: 0.8893... 0.1198 sec/batch\n", "Epoch: 17/20... Training Step: 7885... Training loss: 0.9904... 0.1186 sec/batch\n", "Epoch: 17/20... Training Step: 7886... Training loss: 0.9654... 0.1153 sec/batch\n", "Epoch: 17/20... Training Step: 7887... Training loss: 0.9167... 0.1171 sec/batch\n", "Epoch: 17/20... Training Step: 7888... Training loss: 0.8447... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 7889... Training loss: 1.1995... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 7890... Training loss: 1.0684... 0.1203 sec/batch\n", "Epoch: 18/20... Training Step: 7891... Training loss: 0.8733... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 7892... Training loss: 0.9680... 0.1172 sec/batch\n", "Epoch: 18/20... Training Step: 7893... Training loss: 1.0246... 0.1195 sec/batch\n", "Epoch: 18/20... Training Step: 7894... Training loss: 0.9127... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 7895... Training loss: 1.0226... 0.1153 sec/batch\n", "Epoch: 18/20... Training Step: 7896... Training loss: 0.9170... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 7897... Training loss: 0.8426... 0.1205 sec/batch\n", "Epoch: 18/20... Training Step: 7898... Training loss: 0.9265... 0.1180 sec/batch\n", "Epoch: 18/20... Training Step: 7899... Training loss: 0.8791... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 7900... Training loss: 0.7734... 0.1218 sec/batch\n", "Epoch: 18/20... Training Step: 7901... Training loss: 1.2357... 0.1194 sec/batch\n", "Epoch: 18/20... Training Step: 7902... Training loss: 0.8615... 0.1157 sec/batch\n", "Epoch: 18/20... Training Step: 7903... Training loss: 1.0372... 0.1182 sec/batch\n", "Epoch: 18/20... Training Step: 7904... Training loss: 1.0844... 0.1130 sec/batch\n", "Epoch: 18/20... Training Step: 7905... Training loss: 0.8359... 0.1209 sec/batch\n", "Epoch: 18/20... Training Step: 7906... Training loss: 0.9369... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 7907... Training loss: 0.9592... 0.1222 sec/batch\n", "Epoch: 18/20... Training Step: 7908... Training loss: 0.8428... 0.1267 sec/batch\n", "Epoch: 18/20... Training Step: 7909... Training loss: 0.9839... 0.1339 sec/batch\n", "Epoch: 18/20... Training Step: 7910... Training loss: 0.9263... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 7911... Training loss: 0.9808... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 7912... Training loss: 0.8629... 0.1163 sec/batch\n", "Epoch: 18/20... Training Step: 7913... Training loss: 0.8626... 0.1167 sec/batch\n", "Epoch: 18/20... Training Step: 7914... Training loss: 0.9200... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 7915... Training loss: 0.9436... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 7916... Training loss: 0.8317... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 7917... Training loss: 0.9107... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 7918... Training loss: 0.9756... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 7919... Training loss: 0.8670... 0.1182 sec/batch\n", "Epoch: 18/20... Training Step: 7920... Training loss: 0.8497... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 7921... Training loss: 0.8997... 0.1189 sec/batch\n", "Epoch: 18/20... Training Step: 7922... Training loss: 0.8192... 0.1189 sec/batch\n", "Epoch: 18/20... Training Step: 7923... Training loss: 0.6792... 0.1218 sec/batch\n", "Epoch: 18/20... Training Step: 7924... Training loss: 0.8264... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 7925... Training loss: 0.8558... 0.1217 sec/batch\n", "Epoch: 18/20... Training Step: 7926... Training loss: 0.9954... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 7927... Training loss: 0.8058... 0.1194 sec/batch\n", "Epoch: 18/20... Training Step: 7928... Training loss: 1.1310... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 7929... Training loss: 0.8716... 0.1202 sec/batch\n", "Epoch: 18/20... Training Step: 7930... Training loss: 0.7727... 0.1172 sec/batch\n", "Epoch: 18/20... Training Step: 7931... Training loss: 1.0995... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 7932... Training loss: 0.8041... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 7933... Training loss: 0.9710... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 7934... Training loss: 0.9638... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 7935... Training loss: 1.0008... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 7936... Training loss: 1.0090... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 7937... Training loss: 0.8667... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 7938... Training loss: 0.8430... 0.1201 sec/batch\n", "Epoch: 18/20... Training Step: 7939... Training loss: 0.8686... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 7940... Training loss: 0.9892... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 7941... Training loss: 1.0496... 0.1212 sec/batch\n", "Epoch: 18/20... Training Step: 7942... Training loss: 0.8455... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 7943... Training loss: 0.9363... 0.1207 sec/batch\n", "Epoch: 18/20... Training Step: 7944... Training loss: 0.9014... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 7945... Training loss: 0.9003... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 7946... Training loss: 0.9270... 0.1160 sec/batch\n", "Epoch: 18/20... Training Step: 7947... Training loss: 0.7539... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 7948... Training loss: 0.8953... 0.1186 sec/batch\n", "Epoch: 18/20... Training Step: 7949... Training loss: 0.8480... 0.1136 sec/batch\n", "Epoch: 18/20... Training Step: 7950... Training loss: 1.0207... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 7951... Training loss: 0.7833... 0.1202 sec/batch\n", "Epoch: 18/20... Training Step: 7952... Training loss: 0.9327... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 7953... Training loss: 0.8019... 0.1255 sec/batch\n", "Epoch: 18/20... Training Step: 7954... Training loss: 0.9437... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 7955... Training loss: 0.9194... 0.1215 sec/batch\n", "Epoch: 18/20... Training Step: 7956... Training loss: 0.9374... 0.1151 sec/batch\n", "Epoch: 18/20... Training Step: 7957... Training loss: 0.9134... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 7958... Training loss: 0.9952... 0.1154 sec/batch\n", "Epoch: 18/20... Training Step: 7959... Training loss: 1.0159... 0.1186 sec/batch\n", "Epoch: 18/20... Training Step: 7960... Training loss: 0.7809... 0.1135 sec/batch\n", "Epoch: 18/20... Training Step: 7961... Training loss: 0.8349... 0.1204 sec/batch\n", "Epoch: 18/20... Training Step: 7962... Training loss: 0.7444... 0.1170 sec/batch\n", "Epoch: 18/20... Training Step: 7963... Training loss: 0.9804... 0.1161 sec/batch\n", "Epoch: 18/20... Training Step: 7964... Training loss: 0.8149... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 7965... Training loss: 0.7889... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 7966... Training loss: 0.8883... 0.1211 sec/batch\n", "Epoch: 18/20... Training Step: 7967... Training loss: 0.8855... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 7968... Training loss: 0.9566... 0.1240 sec/batch\n", "Epoch: 18/20... Training Step: 7969... Training loss: 1.1487... 0.1272 sec/batch\n", "Epoch: 18/20... Training Step: 7970... Training loss: 0.8824... 0.1241 sec/batch\n", "Epoch: 18/20... Training Step: 7971... Training loss: 0.8013... 0.1163 sec/batch\n", "Epoch: 18/20... Training Step: 7972... Training loss: 0.9481... 0.1264 sec/batch\n", "Epoch: 18/20... Training Step: 7973... Training loss: 0.9345... 0.1178 sec/batch\n", "Epoch: 18/20... Training Step: 7974... Training loss: 1.0905... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 7975... Training loss: 0.8692... 0.1110 sec/batch\n", "Epoch: 18/20... Training Step: 7976... Training loss: 1.1108... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 7977... Training loss: 1.0913... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 7978... Training loss: 0.8404... 0.1153 sec/batch\n", "Epoch: 18/20... Training Step: 7979... Training loss: 1.0650... 0.1161 sec/batch\n", "Epoch: 18/20... Training Step: 7980... Training loss: 1.1844... 0.1158 sec/batch\n", "Epoch: 18/20... Training Step: 7981... Training loss: 0.8515... 0.1180 sec/batch\n", "Epoch: 18/20... Training Step: 7982... Training loss: 1.1220... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 7983... Training loss: 0.9597... 0.1211 sec/batch\n", "Epoch: 18/20... Training Step: 7984... Training loss: 0.9243... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 7985... Training loss: 1.1161... 0.1161 sec/batch\n", "Epoch: 18/20... Training Step: 7986... Training loss: 1.1634... 0.1167 sec/batch\n", "Epoch: 18/20... Training Step: 7987... Training loss: 0.9546... 0.1217 sec/batch\n", "Epoch: 18/20... Training Step: 7988... Training loss: 0.9782... 0.1207 sec/batch\n", "Epoch: 18/20... Training Step: 7989... Training loss: 1.0073... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 7990... Training loss: 1.0732... 0.1195 sec/batch\n", "Epoch: 18/20... Training Step: 7991... Training loss: 1.0252... 0.1204 sec/batch\n", "Epoch: 18/20... Training Step: 7992... Training loss: 0.9697... 0.1182 sec/batch\n", "Epoch: 18/20... Training Step: 7993... Training loss: 1.1179... 0.1156 sec/batch\n", "Epoch: 18/20... Training Step: 7994... Training loss: 1.0784... 0.1167 sec/batch\n", "Epoch: 18/20... Training Step: 7995... Training loss: 0.8086... 0.1202 sec/batch\n", "Epoch: 18/20... Training Step: 7996... Training loss: 1.0706... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 7997... Training loss: 0.8760... 0.1195 sec/batch\n", "Epoch: 18/20... Training Step: 7998... Training loss: 0.9013... 0.1172 sec/batch\n", "Epoch: 18/20... Training Step: 7999... Training loss: 0.9819... 0.1173 sec/batch\n", "Epoch: 18/20... Training Step: 8000... Training loss: 0.8261... 0.1156 sec/batch\n", "Epoch: 18/20... Training Step: 8001... Training loss: 1.0421... 0.1286 sec/batch\n", "Epoch: 18/20... Training Step: 8002... Training loss: 0.9984... 0.1220 sec/batch\n", "Epoch: 18/20... Training Step: 8003... Training loss: 0.9274... 0.1222 sec/batch\n", "Epoch: 18/20... Training Step: 8004... Training loss: 0.7786... 0.1184 sec/batch\n", "Epoch: 18/20... Training Step: 8005... Training loss: 0.9699... 0.1242 sec/batch\n", "Epoch: 18/20... Training Step: 8006... Training loss: 0.8397... 0.1165 sec/batch\n", "Epoch: 18/20... Training Step: 8007... Training loss: 0.9524... 0.1263 sec/batch\n", "Epoch: 18/20... Training Step: 8008... Training loss: 0.8017... 0.1329 sec/batch\n", "Epoch: 18/20... Training Step: 8009... Training loss: 0.9855... 0.1382 sec/batch\n", "Epoch: 18/20... Training Step: 8010... Training loss: 0.9705... 0.1201 sec/batch\n", "Epoch: 18/20... Training Step: 8011... Training loss: 0.9152... 0.1277 sec/batch\n", "Epoch: 18/20... Training Step: 8012... Training loss: 0.9492... 0.1275 sec/batch\n", "Epoch: 18/20... Training Step: 8013... Training loss: 1.0271... 0.1276 sec/batch\n", "Epoch: 18/20... Training Step: 8014... Training loss: 0.9085... 0.1257 sec/batch\n", "Epoch: 18/20... Training Step: 8015... Training loss: 0.9085... 0.1275 sec/batch\n", "Epoch: 18/20... Training Step: 8016... Training loss: 1.0138... 0.1252 sec/batch\n", "Epoch: 18/20... Training Step: 8017... Training loss: 0.8808... 0.1231 sec/batch\n", "Epoch: 18/20... Training Step: 8018... Training loss: 0.9200... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 8019... Training loss: 0.9594... 0.1178 sec/batch\n", "Epoch: 18/20... Training Step: 8020... Training loss: 0.9125... 0.1142 sec/batch\n", "Epoch: 18/20... Training Step: 8021... Training loss: 0.8698... 0.1255 sec/batch\n", "Epoch: 18/20... Training Step: 8022... Training loss: 1.1484... 0.1260 sec/batch\n", "Epoch: 18/20... Training Step: 8023... Training loss: 0.9393... 0.1245 sec/batch\n", "Epoch: 18/20... Training Step: 8024... Training loss: 0.8751... 0.1236 sec/batch\n", "Epoch: 18/20... Training Step: 8025... Training loss: 0.8713... 0.1197 sec/batch\n", "Epoch: 18/20... Training Step: 8026... Training loss: 1.1181... 0.1244 sec/batch\n", "Epoch: 18/20... Training Step: 8027... Training loss: 0.9432... 0.1203 sec/batch\n", "Epoch: 18/20... Training Step: 8028... Training loss: 0.8842... 0.1161 sec/batch\n", "Epoch: 18/20... Training Step: 8029... Training loss: 0.8274... 0.1273 sec/batch\n", "Epoch: 18/20... Training Step: 8030... Training loss: 0.8489... 0.1278 sec/batch\n", "Epoch: 18/20... Training Step: 8031... Training loss: 0.8770... 0.1328 sec/batch\n", "Epoch: 18/20... Training Step: 8032... Training loss: 1.0657... 0.1236 sec/batch\n", "Epoch: 18/20... Training Step: 8033... Training loss: 0.8777... 0.1286 sec/batch\n", "Epoch: 18/20... Training Step: 8034... Training loss: 0.9082... 0.1343 sec/batch\n", "Epoch: 18/20... Training Step: 8035... Training loss: 0.8404... 0.1273 sec/batch\n", "Epoch: 18/20... Training Step: 8036... Training loss: 0.8007... 0.1344 sec/batch\n", "Epoch: 18/20... Training Step: 8037... Training loss: 0.7997... 0.1369 sec/batch\n", "Epoch: 18/20... Training Step: 8038... Training loss: 0.9636... 0.1354 sec/batch\n", "Epoch: 18/20... Training Step: 8039... Training loss: 0.8807... 0.1296 sec/batch\n", "Epoch: 18/20... Training Step: 8040... Training loss: 0.9865... 0.1283 sec/batch\n", "Epoch: 18/20... Training Step: 8041... Training loss: 1.1032... 0.1255 sec/batch\n", "Epoch: 18/20... Training Step: 8042... Training loss: 1.0164... 0.1228 sec/batch\n", "Epoch: 18/20... Training Step: 8043... Training loss: 0.8208... 0.1244 sec/batch\n", "Epoch: 18/20... Training Step: 8044... Training loss: 0.8725... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 8045... Training loss: 0.9739... 0.1248 sec/batch\n", "Epoch: 18/20... Training Step: 8046... Training loss: 0.8841... 0.1254 sec/batch\n", "Epoch: 18/20... Training Step: 8047... Training loss: 0.7506... 0.1231 sec/batch\n", "Epoch: 18/20... Training Step: 8048... Training loss: 0.9009... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 8049... Training loss: 1.0109... 0.1143 sec/batch\n", "Epoch: 18/20... Training Step: 8050... Training loss: 0.8988... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8051... Training loss: 1.0202... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8052... Training loss: 0.8769... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 8053... Training loss: 0.9770... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 8054... Training loss: 0.9409... 0.1143 sec/batch\n", "Epoch: 18/20... Training Step: 8055... Training loss: 0.7155... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 8056... Training loss: 1.0102... 0.1190 sec/batch\n", "Epoch: 18/20... Training Step: 8057... Training loss: 0.7779... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8058... Training loss: 0.8849... 0.1157 sec/batch\n", "Epoch: 18/20... Training Step: 8059... Training loss: 1.0222... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8060... Training loss: 0.9792... 0.1152 sec/batch\n", "Epoch: 18/20... Training Step: 8061... Training loss: 1.0303... 0.1288 sec/batch\n", "Epoch: 18/20... Training Step: 8062... Training loss: 0.9550... 0.1234 sec/batch\n", "Epoch: 18/20... Training Step: 8063... Training loss: 1.1485... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 8064... Training loss: 0.8554... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8065... Training loss: 0.8532... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 8066... Training loss: 1.0216... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 8067... Training loss: 0.8422... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 8068... Training loss: 0.9236... 0.1156 sec/batch\n", "Epoch: 18/20... Training Step: 8069... Training loss: 0.7626... 0.1207 sec/batch\n", "Epoch: 18/20... Training Step: 8070... Training loss: 1.0368... 0.1165 sec/batch\n", "Epoch: 18/20... Training Step: 8071... Training loss: 1.0480... 0.1140 sec/batch\n", "Epoch: 18/20... Training Step: 8072... Training loss: 0.9790... 0.1197 sec/batch\n", "Epoch: 18/20... Training Step: 8073... Training loss: 1.1193... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 8074... Training loss: 1.0596... 0.1234 sec/batch\n", "Epoch: 18/20... Training Step: 8075... Training loss: 1.0123... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 8076... Training loss: 0.8581... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 8077... Training loss: 1.0114... 0.1220 sec/batch\n", "Epoch: 18/20... Training Step: 8078... Training loss: 0.8254... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 8079... Training loss: 0.8717... 0.1170 sec/batch\n", "Epoch: 18/20... Training Step: 8080... Training loss: 0.8136... 0.1144 sec/batch\n", "Epoch: 18/20... Training Step: 8081... Training loss: 0.8587... 0.1256 sec/batch\n", "Epoch: 18/20... Training Step: 8082... Training loss: 0.9035... 0.1208 sec/batch\n", "Epoch: 18/20... Training Step: 8083... Training loss: 1.0676... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 8084... Training loss: 1.0160... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8085... Training loss: 0.8906... 0.1157 sec/batch\n", "Epoch: 18/20... Training Step: 8086... Training loss: 0.9471... 0.1129 sec/batch\n", "Epoch: 18/20... Training Step: 8087... Training loss: 0.8009... 0.1164 sec/batch\n", "Epoch: 18/20... Training Step: 8088... Training loss: 0.9353... 0.1190 sec/batch\n", "Epoch: 18/20... Training Step: 8089... Training loss: 0.9493... 0.1217 sec/batch\n", "Epoch: 18/20... Training Step: 8090... Training loss: 1.1290... 0.1135 sec/batch\n", "Epoch: 18/20... Training Step: 8091... Training loss: 0.8424... 0.1187 sec/batch\n", "Epoch: 18/20... Training Step: 8092... Training loss: 1.1078... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 8093... Training loss: 0.9812... 0.1204 sec/batch\n", "Epoch: 18/20... Training Step: 8094... Training loss: 1.0555... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 8095... Training loss: 0.8589... 0.1164 sec/batch\n", "Epoch: 18/20... Training Step: 8096... Training loss: 0.9411... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 8097... Training loss: 0.9387... 0.1205 sec/batch\n", "Epoch: 18/20... Training Step: 8098... Training loss: 0.7482... 0.1185 sec/batch\n", "Epoch: 18/20... Training Step: 8099... Training loss: 0.9148... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8100... Training loss: 1.0871... 0.1220 sec/batch\n", "Epoch: 18/20... Training Step: 8101... Training loss: 0.9539... 0.1185 sec/batch\n", "Epoch: 18/20... Training Step: 8102... Training loss: 0.8992... 0.1165 sec/batch\n", "Epoch: 18/20... Training Step: 8103... Training loss: 1.0452... 0.1169 sec/batch\n", "Epoch: 18/20... Training Step: 8104... Training loss: 0.8627... 0.1185 sec/batch\n", "Epoch: 18/20... Training Step: 8105... Training loss: 0.9289... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 8106... Training loss: 1.0311... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 8107... Training loss: 1.0387... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 8108... Training loss: 0.8090... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 8109... Training loss: 0.9531... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 8110... Training loss: 1.2152... 0.1172 sec/batch\n", "Epoch: 18/20... Training Step: 8111... Training loss: 1.0350... 0.1184 sec/batch\n", "Epoch: 18/20... Training Step: 8112... Training loss: 1.1994... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 8113... Training loss: 0.9079... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 8114... Training loss: 1.1227... 0.1218 sec/batch\n", "Epoch: 18/20... Training Step: 8115... Training loss: 1.0919... 0.1216 sec/batch\n", "Epoch: 18/20... Training Step: 8116... Training loss: 0.9687... 0.1152 sec/batch\n", "Epoch: 18/20... Training Step: 8117... Training loss: 1.0246... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 8118... Training loss: 0.8806... 0.1178 sec/batch\n", "Epoch: 18/20... Training Step: 8119... Training loss: 0.9861... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 8120... Training loss: 1.0741... 0.1104 sec/batch\n", "Epoch: 18/20... Training Step: 8121... Training loss: 1.1239... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 8122... Training loss: 0.9833... 0.1200 sec/batch\n", "Epoch: 18/20... Training Step: 8123... Training loss: 1.1755... 0.1201 sec/batch\n", "Epoch: 18/20... Training Step: 8124... Training loss: 1.0677... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 8125... Training loss: 0.9615... 0.1155 sec/batch\n", "Epoch: 18/20... Training Step: 8126... Training loss: 0.9244... 0.1134 sec/batch\n", "Epoch: 18/20... Training Step: 8127... Training loss: 0.9304... 0.1137 sec/batch\n", "Epoch: 18/20... Training Step: 8128... Training loss: 1.0288... 0.1139 sec/batch\n", "Epoch: 18/20... Training Step: 8129... Training loss: 0.8746... 0.1203 sec/batch\n", "Epoch: 18/20... Training Step: 8130... Training loss: 0.8947... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 8131... Training loss: 1.1132... 0.1164 sec/batch\n", "Epoch: 18/20... Training Step: 8132... Training loss: 0.8238... 0.1209 sec/batch\n", "Epoch: 18/20... Training Step: 8133... Training loss: 0.9429... 0.1167 sec/batch\n", "Epoch: 18/20... Training Step: 8134... Training loss: 0.8887... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 8135... Training loss: 0.8454... 0.1147 sec/batch\n", "Epoch: 18/20... Training Step: 8136... Training loss: 1.0062... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 8137... Training loss: 0.8936... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8138... Training loss: 0.8471... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 8139... Training loss: 0.8942... 0.1201 sec/batch\n", "Epoch: 18/20... Training Step: 8140... Training loss: 1.0764... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8141... Training loss: 0.9471... 0.1139 sec/batch\n", "Epoch: 18/20... Training Step: 8142... Training loss: 0.8576... 0.1212 sec/batch\n", "Epoch: 18/20... Training Step: 8143... Training loss: 0.8660... 0.1156 sec/batch\n", "Epoch: 18/20... Training Step: 8144... Training loss: 0.9709... 0.1229 sec/batch\n", "Epoch: 18/20... Training Step: 8145... Training loss: 1.1029... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 8146... Training loss: 0.9685... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 8147... Training loss: 0.7179... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8148... Training loss: 0.8535... 0.1224 sec/batch\n", "Epoch: 18/20... Training Step: 8149... Training loss: 0.9524... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 8150... Training loss: 1.0426... 0.1134 sec/batch\n", "Epoch: 18/20... Training Step: 8151... Training loss: 0.9237... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 8152... Training loss: 1.0667... 0.1184 sec/batch\n", "Epoch: 18/20... Training Step: 8153... Training loss: 1.0212... 0.1316 sec/batch\n", "Epoch: 18/20... Training Step: 8154... Training loss: 1.0083... 0.1202 sec/batch\n", "Epoch: 18/20... Training Step: 8155... Training loss: 1.0856... 0.1129 sec/batch\n", "Epoch: 18/20... Training Step: 8156... Training loss: 1.1464... 0.1154 sec/batch\n", "Epoch: 18/20... Training Step: 8157... Training loss: 0.9247... 0.1188 sec/batch\n", "Epoch: 18/20... Training Step: 8158... Training loss: 0.9509... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 8159... Training loss: 0.9575... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8160... Training loss: 1.0754... 0.1189 sec/batch\n", "Epoch: 18/20... Training Step: 8161... Training loss: 1.1622... 0.1166 sec/batch\n", "Epoch: 18/20... Training Step: 8162... Training loss: 1.0672... 0.1236 sec/batch\n", "Epoch: 18/20... Training Step: 8163... Training loss: 0.9871... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 8164... Training loss: 0.8056... 0.1185 sec/batch\n", "Epoch: 18/20... Training Step: 8165... Training loss: 0.8612... 0.1228 sec/batch\n", "Epoch: 18/20... Training Step: 8166... Training loss: 1.1043... 0.1214 sec/batch\n", "Epoch: 18/20... Training Step: 8167... Training loss: 1.0337... 0.1284 sec/batch\n", "Epoch: 18/20... Training Step: 8168... Training loss: 1.0249... 0.1330 sec/batch\n", "Epoch: 18/20... Training Step: 8169... Training loss: 0.9251... 0.1286 sec/batch\n", "Epoch: 18/20... Training Step: 8170... Training loss: 0.9611... 0.1203 sec/batch\n", "Epoch: 18/20... Training Step: 8171... Training loss: 1.0260... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8172... Training loss: 0.9884... 0.1249 sec/batch\n", "Epoch: 18/20... Training Step: 8173... Training loss: 0.9052... 0.1246 sec/batch\n", "Epoch: 18/20... Training Step: 8174... Training loss: 0.9370... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 8175... Training loss: 1.0528... 0.1198 sec/batch\n", "Epoch: 18/20... Training Step: 8176... Training loss: 0.9738... 0.1207 sec/batch\n", "Epoch: 18/20... Training Step: 8177... Training loss: 0.9602... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8178... Training loss: 0.9295... 0.1170 sec/batch\n", "Epoch: 18/20... Training Step: 8179... Training loss: 0.9293... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8180... Training loss: 0.8706... 0.1196 sec/batch\n", "Epoch: 18/20... Training Step: 8181... Training loss: 0.8723... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8182... Training loss: 1.1081... 0.1460 sec/batch\n", "Epoch: 18/20... Training Step: 8183... Training loss: 0.9854... 0.1202 sec/batch\n", "Epoch: 18/20... Training Step: 8184... Training loss: 1.1135... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 8185... Training loss: 0.8220... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8186... Training loss: 0.8876... 0.1209 sec/batch\n", "Epoch: 18/20... Training Step: 8187... Training loss: 0.9418... 0.1154 sec/batch\n", "Epoch: 18/20... Training Step: 8188... Training loss: 0.9915... 0.1153 sec/batch\n", "Epoch: 18/20... Training Step: 8189... Training loss: 1.0160... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8190... Training loss: 0.9817... 0.1191 sec/batch\n", "Epoch: 18/20... Training Step: 8191... Training loss: 0.9962... 0.1229 sec/batch\n", "Epoch: 18/20... Training Step: 8192... Training loss: 1.2310... 0.1313 sec/batch\n", "Epoch: 18/20... Training Step: 8193... Training loss: 0.9498... 0.1287 sec/batch\n", "Epoch: 18/20... Training Step: 8194... Training loss: 1.0884... 0.1335 sec/batch\n", "Epoch: 18/20... Training Step: 8195... Training loss: 1.0174... 0.1225 sec/batch\n", "Epoch: 18/20... Training Step: 8196... Training loss: 1.2694... 0.1305 sec/batch\n", "Epoch: 18/20... Training Step: 8197... Training loss: 1.0784... 0.1250 sec/batch\n", "Epoch: 18/20... Training Step: 8198... Training loss: 1.0894... 0.1146 sec/batch\n", "Epoch: 18/20... Training Step: 8199... Training loss: 1.0071... 0.1240 sec/batch\n", "Epoch: 18/20... Training Step: 8200... Training loss: 0.7382... 0.1158 sec/batch\n", "Epoch: 18/20... Training Step: 8201... Training loss: 0.8460... 0.1170 sec/batch\n", "Epoch: 18/20... Training Step: 8202... Training loss: 0.9063... 0.1251 sec/batch\n", "Epoch: 18/20... Training Step: 8203... Training loss: 0.8354... 0.1271 sec/batch\n", "Epoch: 18/20... Training Step: 8204... Training loss: 0.7939... 0.1262 sec/batch\n", "Epoch: 18/20... Training Step: 8205... Training loss: 0.7599... 0.1243 sec/batch\n", "Epoch: 18/20... Training Step: 8206... Training loss: 0.9393... 0.1318 sec/batch\n", "Epoch: 18/20... Training Step: 8207... Training loss: 0.9621... 0.1306 sec/batch\n", "Epoch: 18/20... Training Step: 8208... Training loss: 0.9248... 0.1178 sec/batch\n", "Epoch: 18/20... Training Step: 8209... Training loss: 0.8865... 0.1153 sec/batch\n", "Epoch: 18/20... Training Step: 8210... Training loss: 1.1410... 0.1196 sec/batch\n", "Epoch: 18/20... Training Step: 8211... Training loss: 0.9348... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8212... Training loss: 1.0744... 0.1181 sec/batch\n", "Epoch: 18/20... Training Step: 8213... Training loss: 0.8063... 0.1173 sec/batch\n", "Epoch: 18/20... Training Step: 8214... Training loss: 0.7476... 0.1127 sec/batch\n", "Epoch: 18/20... Training Step: 8215... Training loss: 0.9746... 0.1206 sec/batch\n", "Epoch: 18/20... Training Step: 8216... Training loss: 0.8606... 0.1176 sec/batch\n", "Epoch: 18/20... Training Step: 8217... Training loss: 0.8909... 0.1195 sec/batch\n", "Epoch: 18/20... Training Step: 8218... Training loss: 0.8404... 0.1193 sec/batch\n", "Epoch: 18/20... Training Step: 8219... Training loss: 1.0188... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8220... Training loss: 0.9484... 0.1185 sec/batch\n", "Epoch: 18/20... Training Step: 8221... Training loss: 0.8516... 0.1215 sec/batch\n", "Epoch: 18/20... Training Step: 8222... Training loss: 0.9533... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 8223... Training loss: 0.9483... 0.1218 sec/batch\n", "Epoch: 18/20... Training Step: 8224... Training loss: 0.9496... 0.1174 sec/batch\n", "Epoch: 18/20... Training Step: 8225... Training loss: 0.7561... 0.1186 sec/batch\n", "Epoch: 18/20... Training Step: 8226... Training loss: 0.8701... 0.1182 sec/batch\n", "Epoch: 18/20... Training Step: 8227... Training loss: 0.9415... 0.1168 sec/batch\n", "Epoch: 18/20... Training Step: 8228... Training loss: 0.8039... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8229... Training loss: 0.8570... 0.1218 sec/batch\n", "Epoch: 18/20... Training Step: 8230... Training loss: 0.9960... 0.1165 sec/batch\n", "Epoch: 18/20... Training Step: 8231... Training loss: 0.8151... 0.1149 sec/batch\n", "Epoch: 18/20... Training Step: 8232... Training loss: 0.9258... 0.1205 sec/batch\n", "Epoch: 18/20... Training Step: 8233... Training loss: 0.8916... 0.1256 sec/batch\n", "Epoch: 18/20... Training Step: 8234... Training loss: 0.9030... 0.1265 sec/batch\n", "Epoch: 18/20... Training Step: 8235... Training loss: 0.8692... 0.1291 sec/batch\n", "Epoch: 18/20... Training Step: 8236... Training loss: 1.1076... 0.1228 sec/batch\n", "Epoch: 18/20... Training Step: 8237... Training loss: 0.9123... 0.1229 sec/batch\n", "Epoch: 18/20... Training Step: 8238... Training loss: 0.9279... 0.1329 sec/batch\n", "Epoch: 18/20... Training Step: 8239... Training loss: 0.9361... 0.1231 sec/batch\n", "Epoch: 18/20... Training Step: 8240... Training loss: 0.8841... 0.1274 sec/batch\n", "Epoch: 18/20... Training Step: 8241... Training loss: 0.8461... 0.1373 sec/batch\n", "Epoch: 18/20... Training Step: 8242... Training loss: 0.6147... 0.1312 sec/batch\n", "Epoch: 18/20... Training Step: 8243... Training loss: 0.8658... 0.1299 sec/batch\n", "Epoch: 18/20... Training Step: 8244... Training loss: 0.8986... 0.1278 sec/batch\n", "Epoch: 18/20... Training Step: 8245... Training loss: 0.8657... 0.1251 sec/batch\n", "Epoch: 18/20... Training Step: 8246... Training loss: 1.0087... 0.1306 sec/batch\n", "Epoch: 18/20... Training Step: 8247... Training loss: 1.1485... 0.1229 sec/batch\n", "Epoch: 18/20... Training Step: 8248... Training loss: 0.8012... 0.1225 sec/batch\n", "Epoch: 18/20... Training Step: 8249... Training loss: 0.9908... 0.1217 sec/batch\n", "Epoch: 18/20... Training Step: 8250... Training loss: 0.8224... 0.1212 sec/batch\n", "Epoch: 18/20... Training Step: 8251... Training loss: 0.7865... 0.1271 sec/batch\n", "Epoch: 18/20... Training Step: 8252... Training loss: 1.0453... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8253... Training loss: 0.9370... 0.1296 sec/batch\n", "Epoch: 18/20... Training Step: 8254... Training loss: 0.9823... 0.1238 sec/batch\n", "Epoch: 18/20... Training Step: 8255... Training loss: 0.9448... 0.1307 sec/batch\n", "Epoch: 18/20... Training Step: 8256... Training loss: 1.0134... 0.1250 sec/batch\n", "Epoch: 18/20... Training Step: 8257... Training loss: 1.1106... 0.1300 sec/batch\n", "Epoch: 18/20... Training Step: 8258... Training loss: 1.0409... 0.1275 sec/batch\n", "Epoch: 18/20... Training Step: 8259... Training loss: 0.8549... 0.1173 sec/batch\n", "Epoch: 18/20... Training Step: 8260... Training loss: 0.9710... 0.1241 sec/batch\n", "Epoch: 18/20... Training Step: 8261... Training loss: 0.7809... 0.1309 sec/batch\n", "Epoch: 18/20... Training Step: 8262... Training loss: 0.8501... 0.1220 sec/batch\n", "Epoch: 18/20... Training Step: 8263... Training loss: 0.8758... 0.1296 sec/batch\n", "Epoch: 18/20... Training Step: 8264... Training loss: 0.9564... 0.1180 sec/batch\n", "Epoch: 18/20... Training Step: 8265... Training loss: 0.9971... 0.1228 sec/batch\n", "Epoch: 18/20... Training Step: 8266... Training loss: 1.0631... 0.1323 sec/batch\n", "Epoch: 18/20... Training Step: 8267... Training loss: 0.9565... 0.1234 sec/batch\n", "Epoch: 18/20... Training Step: 8268... Training loss: 0.8936... 0.1190 sec/batch\n", "Epoch: 18/20... Training Step: 8269... Training loss: 0.8845... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 8270... Training loss: 0.9486... 0.1250 sec/batch\n", "Epoch: 18/20... Training Step: 8271... Training loss: 0.9039... 0.1321 sec/batch\n", "Epoch: 18/20... Training Step: 8272... Training loss: 0.9601... 0.1383 sec/batch\n", "Epoch: 18/20... Training Step: 8273... Training loss: 0.9400... 0.1337 sec/batch\n", "Epoch: 18/20... Training Step: 8274... Training loss: 0.7964... 0.1159 sec/batch\n", "Epoch: 18/20... Training Step: 8275... Training loss: 0.9777... 0.1472 sec/batch\n", "Epoch: 18/20... Training Step: 8276... Training loss: 0.8723... 0.1259 sec/batch\n", "Epoch: 18/20... Training Step: 8277... Training loss: 0.8566... 0.1253 sec/batch\n", "Epoch: 18/20... Training Step: 8278... Training loss: 0.9050... 0.1234 sec/batch\n", "Epoch: 18/20... Training Step: 8279... Training loss: 0.7644... 0.1182 sec/batch\n", "Epoch: 18/20... Training Step: 8280... Training loss: 0.8686... 0.1250 sec/batch\n", "Epoch: 18/20... Training Step: 8281... Training loss: 0.9575... 0.1222 sec/batch\n", "Epoch: 18/20... Training Step: 8282... Training loss: 0.8188... 0.1210 sec/batch\n", "Epoch: 18/20... Training Step: 8283... Training loss: 0.9057... 0.1293 sec/batch\n", "Epoch: 18/20... Training Step: 8284... Training loss: 0.9609... 0.1260 sec/batch\n", "Epoch: 18/20... Training Step: 8285... Training loss: 0.9091... 0.1128 sec/batch\n", "Epoch: 18/20... Training Step: 8286... Training loss: 1.0026... 0.1135 sec/batch\n", "Epoch: 18/20... Training Step: 8287... Training loss: 1.0357... 0.1245 sec/batch\n", "Epoch: 18/20... Training Step: 8288... Training loss: 1.0003... 0.1289 sec/batch\n", "Epoch: 18/20... Training Step: 8289... Training loss: 0.8817... 0.1226 sec/batch\n", "Epoch: 18/20... Training Step: 8290... Training loss: 0.9983... 0.1162 sec/batch\n", "Epoch: 18/20... Training Step: 8291... Training loss: 0.8399... 0.1175 sec/batch\n", "Epoch: 18/20... Training Step: 8292... Training loss: 0.9412... 0.1177 sec/batch\n", "Epoch: 18/20... Training Step: 8293... Training loss: 1.1047... 0.1290 sec/batch\n", "Epoch: 18/20... Training Step: 8294... Training loss: 1.0386... 0.1183 sec/batch\n", "Epoch: 18/20... Training Step: 8295... Training loss: 0.8983... 0.1179 sec/batch\n", "Epoch: 18/20... Training Step: 8296... Training loss: 1.1281... 0.1164 sec/batch\n", "Epoch: 18/20... Training Step: 8297... Training loss: 0.9074... 0.1155 sec/batch\n", "Epoch: 18/20... Training Step: 8298... Training loss: 0.8986... 0.1204 sec/batch\n", "Epoch: 18/20... Training Step: 8299... Training loss: 0.8398... 0.1152 sec/batch\n", "Epoch: 18/20... Training Step: 8300... Training loss: 0.8613... 0.1245 sec/batch\n", "Epoch: 18/20... Training Step: 8301... Training loss: 0.9387... 0.1234 sec/batch\n", "Epoch: 18/20... Training Step: 8302... Training loss: 0.9664... 0.1222 sec/batch\n", "Epoch: 18/20... Training Step: 8303... Training loss: 1.0031... 0.1224 sec/batch\n", "Epoch: 18/20... Training Step: 8304... Training loss: 0.9805... 0.1257 sec/batch\n", "Epoch: 18/20... Training Step: 8305... Training loss: 0.8408... 0.1226 sec/batch\n", "Epoch: 18/20... Training Step: 8306... Training loss: 0.7967... 0.1292 sec/batch\n", "Epoch: 18/20... Training Step: 8307... Training loss: 0.7992... 0.1304 sec/batch\n", "Epoch: 18/20... Training Step: 8308... Training loss: 0.8665... 0.1325 sec/batch\n", "Epoch: 18/20... Training Step: 8309... Training loss: 1.0707... 0.1248 sec/batch\n", "Epoch: 18/20... Training Step: 8310... Training loss: 0.8937... 0.1171 sec/batch\n", "Epoch: 18/20... Training Step: 8311... Training loss: 0.8534... 0.1227 sec/batch\n", "Epoch: 18/20... Training Step: 8312... Training loss: 0.7684... 0.1243 sec/batch\n", "Epoch: 18/20... Training Step: 8313... Training loss: 0.8224... 0.1245 sec/batch\n", "Epoch: 18/20... Training Step: 8314... Training loss: 0.8700... 0.1200 sec/batch\n", "Epoch: 18/20... Training Step: 8315... Training loss: 0.8424... 0.1270 sec/batch\n", "Epoch: 18/20... Training Step: 8316... Training loss: 0.9440... 0.1265 sec/batch\n", "Epoch: 18/20... Training Step: 8317... Training loss: 0.8613... 0.1299 sec/batch\n", "Epoch: 18/20... Training Step: 8318... Training loss: 0.8361... 0.1295 sec/batch\n", "Epoch: 18/20... Training Step: 8319... Training loss: 1.1101... 0.1287 sec/batch\n", "Epoch: 18/20... Training Step: 8320... Training loss: 0.8756... 0.1187 sec/batch\n", "Epoch: 18/20... Training Step: 8321... Training loss: 1.0962... 0.1199 sec/batch\n", "Epoch: 18/20... Training Step: 8322... Training loss: 1.0412... 0.1233 sec/batch\n", "Epoch: 18/20... Training Step: 8323... Training loss: 0.8265... 0.1280 sec/batch\n", "Epoch: 18/20... Training Step: 8324... Training loss: 0.9549... 0.1273 sec/batch\n", "Epoch: 18/20... Training Step: 8325... Training loss: 0.9535... 0.1380 sec/batch\n", "Epoch: 18/20... Training Step: 8326... Training loss: 1.0093... 0.1380 sec/batch\n", "Epoch: 18/20... Training Step: 8327... Training loss: 1.0489... 0.1301 sec/batch\n", "Epoch: 18/20... Training Step: 8328... Training loss: 1.0168... 0.1279 sec/batch\n", "Epoch: 18/20... Training Step: 8329... Training loss: 0.8217... 0.1333 sec/batch\n", "Epoch: 18/20... Training Step: 8330... Training loss: 0.8971... 0.1275 sec/batch\n", "Epoch: 18/20... Training Step: 8331... Training loss: 0.8663... 0.1273 sec/batch\n", "Epoch: 18/20... Training Step: 8332... Training loss: 0.9541... 0.1206 sec/batch\n", "Epoch: 18/20... Training Step: 8333... Training loss: 0.8503... 0.1332 sec/batch\n", "Epoch: 18/20... Training Step: 8334... Training loss: 0.9915... 0.1265 sec/batch\n", "Epoch: 18/20... Training Step: 8335... Training loss: 1.0506... 0.1230 sec/batch\n", "Epoch: 18/20... Training Step: 8336... Training loss: 0.8883... 0.1306 sec/batch\n", "Epoch: 18/20... Training Step: 8337... Training loss: 0.8749... 0.1255 sec/batch\n", "Epoch: 18/20... Training Step: 8338... Training loss: 0.9057... 0.1265 sec/batch\n", "Epoch: 18/20... Training Step: 8339... Training loss: 0.9189... 0.1322 sec/batch\n", "Epoch: 18/20... Training Step: 8340... Training loss: 0.9050... 0.1283 sec/batch\n", "Epoch: 18/20... Training Step: 8341... Training loss: 0.8074... 0.1338 sec/batch\n", "Epoch: 18/20... Training Step: 8342... Training loss: 1.0813... 0.1305 sec/batch\n", "Epoch: 18/20... Training Step: 8343... Training loss: 0.9666... 0.1312 sec/batch\n", "Epoch: 18/20... Training Step: 8344... Training loss: 0.7942... 0.1281 sec/batch\n", "Epoch: 18/20... Training Step: 8345... Training loss: 0.8730... 0.1282 sec/batch\n", "Epoch: 18/20... Training Step: 8346... Training loss: 0.8492... 0.1381 sec/batch\n", "Epoch: 18/20... Training Step: 8347... Training loss: 0.7918... 0.1263 sec/batch\n", "Epoch: 18/20... Training Step: 8348... Training loss: 0.8757... 0.1147 sec/batch\n", "Epoch: 18/20... Training Step: 8349... Training loss: 0.8952... 0.1152 sec/batch\n", "Epoch: 18/20... Training Step: 8350... Training loss: 0.8989... 0.1155 sec/batch\n", "Epoch: 18/20... Training Step: 8351... Training loss: 0.8481... 0.1227 sec/batch\n", "Epoch: 18/20... Training Step: 8352... Training loss: 1.0280... 0.1206 sec/batch\n", "Epoch: 19/20... Training Step: 8353... Training loss: 1.0587... 0.1220 sec/batch\n", "Epoch: 19/20... Training Step: 8354... Training loss: 1.0556... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8355... Training loss: 1.0438... 0.1250 sec/batch\n", "Epoch: 19/20... Training Step: 8356... Training loss: 0.9614... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8357... Training loss: 0.9439... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8358... Training loss: 0.9039... 0.1157 sec/batch\n", "Epoch: 19/20... Training Step: 8359... Training loss: 1.0787... 0.1212 sec/batch\n", "Epoch: 19/20... Training Step: 8360... Training loss: 0.8546... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8361... Training loss: 0.8039... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8362... Training loss: 0.9296... 0.1197 sec/batch\n", "Epoch: 19/20... Training Step: 8363... Training loss: 0.8431... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8364... Training loss: 0.8165... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8365... Training loss: 1.1344... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8366... Training loss: 0.8621... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8367... Training loss: 1.0189... 0.1140 sec/batch\n", "Epoch: 19/20... Training Step: 8368... Training loss: 1.0598... 0.1173 sec/batch\n", "Epoch: 19/20... Training Step: 8369... Training loss: 0.8167... 0.1173 sec/batch\n", "Epoch: 19/20... Training Step: 8370... Training loss: 0.9923... 0.1135 sec/batch\n", "Epoch: 19/20... Training Step: 8371... Training loss: 0.9444... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8372... Training loss: 0.9540... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8373... Training loss: 1.0692... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8374... Training loss: 0.8254... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8375... Training loss: 0.8837... 0.1226 sec/batch\n", "Epoch: 19/20... Training Step: 8376... Training loss: 0.8781... 0.1257 sec/batch\n", "Epoch: 19/20... Training Step: 8377... Training loss: 0.8781... 0.1273 sec/batch\n", "Epoch: 19/20... Training Step: 8378... Training loss: 0.9567... 0.1235 sec/batch\n", "Epoch: 19/20... Training Step: 8379... Training loss: 0.9714... 0.1223 sec/batch\n", "Epoch: 19/20... Training Step: 8380... Training loss: 0.9400... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8381... Training loss: 0.8993... 0.1221 sec/batch\n", "Epoch: 19/20... Training Step: 8382... Training loss: 0.9313... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8383... Training loss: 0.8792... 0.1210 sec/batch\n", "Epoch: 19/20... Training Step: 8384... Training loss: 0.9148... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8385... Training loss: 0.8202... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8386... Training loss: 0.7683... 0.1195 sec/batch\n", "Epoch: 19/20... Training Step: 8387... Training loss: 0.8810... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8388... Training loss: 0.8269... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8389... Training loss: 0.8952... 0.1222 sec/batch\n", "Epoch: 19/20... Training Step: 8390... Training loss: 0.9436... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8391... Training loss: 0.8302... 0.1229 sec/batch\n", "Epoch: 19/20... Training Step: 8392... Training loss: 1.1219... 0.1226 sec/batch\n", "Epoch: 19/20... Training Step: 8393... Training loss: 0.9437... 0.1233 sec/batch\n", "Epoch: 19/20... Training Step: 8394... Training loss: 0.7915... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8395... Training loss: 0.9694... 0.1148 sec/batch\n", "Epoch: 19/20... Training Step: 8396... Training loss: 0.8930... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8397... Training loss: 1.0146... 0.1205 sec/batch\n", "Epoch: 19/20... Training Step: 8398... Training loss: 1.0760... 0.1140 sec/batch\n", "Epoch: 19/20... Training Step: 8399... Training loss: 0.9584... 0.1209 sec/batch\n", "Epoch: 19/20... Training Step: 8400... Training loss: 0.9542... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8401... Training loss: 0.9357... 0.1244 sec/batch\n", "Epoch: 19/20... Training Step: 8402... Training loss: 0.7961... 0.1183 sec/batch\n", "Epoch: 19/20... Training Step: 8403... Training loss: 0.8254... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8404... Training loss: 0.9462... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8405... Training loss: 0.8446... 0.1151 sec/batch\n", "Epoch: 19/20... Training Step: 8406... Training loss: 0.7961... 0.1223 sec/batch\n", "Epoch: 19/20... Training Step: 8407... Training loss: 0.7677... 0.1262 sec/batch\n", "Epoch: 19/20... Training Step: 8408... Training loss: 0.7660... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8409... Training loss: 1.0374... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8410... Training loss: 0.9679... 0.1210 sec/batch\n", "Epoch: 19/20... Training Step: 8411... Training loss: 0.7982... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8412... Training loss: 0.8126... 0.1145 sec/batch\n", "Epoch: 19/20... Training Step: 8413... Training loss: 0.7423... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8414... Training loss: 0.9773... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8415... Training loss: 0.7734... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8416... Training loss: 0.9459... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8417... Training loss: 0.9017... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8418... Training loss: 0.8908... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8419... Training loss: 0.9738... 0.1134 sec/batch\n", "Epoch: 19/20... Training Step: 8420... Training loss: 0.9495... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8421... Training loss: 0.8455... 0.1154 sec/batch\n", "Epoch: 19/20... Training Step: 8422... Training loss: 0.9506... 0.1233 sec/batch\n", "Epoch: 19/20... Training Step: 8423... Training loss: 0.9734... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8424... Training loss: 0.7675... 0.1202 sec/batch\n", "Epoch: 19/20... Training Step: 8425... Training loss: 0.7218... 0.1160 sec/batch\n", "Epoch: 19/20... Training Step: 8426... Training loss: 0.7669... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8427... Training loss: 1.0333... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8428... Training loss: 0.7807... 0.1222 sec/batch\n", "Epoch: 19/20... Training Step: 8429... Training loss: 0.8915... 0.1133 sec/batch\n", "Epoch: 19/20... Training Step: 8430... Training loss: 0.8830... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8431... Training loss: 0.9661... 0.1157 sec/batch\n", "Epoch: 19/20... Training Step: 8432... Training loss: 0.8350... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8433... Training loss: 0.9652... 0.1219 sec/batch\n", "Epoch: 19/20... Training Step: 8434... Training loss: 0.8430... 0.1146 sec/batch\n", "Epoch: 19/20... Training Step: 8435... Training loss: 0.7620... 0.1208 sec/batch\n", "Epoch: 19/20... Training Step: 8436... Training loss: 0.9202... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8437... Training loss: 0.8618... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8438... Training loss: 1.0756... 0.1145 sec/batch\n", "Epoch: 19/20... Training Step: 8439... Training loss: 0.8286... 0.1186 sec/batch\n", "Epoch: 19/20... Training Step: 8440... Training loss: 1.0239... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8441... Training loss: 0.9587... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8442... Training loss: 0.8869... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8443... Training loss: 1.1216... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8444... Training loss: 0.9900... 0.1176 sec/batch\n", "Epoch: 19/20... Training Step: 8445... Training loss: 0.7984... 0.1139 sec/batch\n", "Epoch: 19/20... Training Step: 8446... Training loss: 1.0059... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8447... Training loss: 0.8419... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8448... Training loss: 0.8825... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8449... Training loss: 1.0125... 0.1224 sec/batch\n", "Epoch: 19/20... Training Step: 8450... Training loss: 0.9263... 0.1176 sec/batch\n", "Epoch: 19/20... Training Step: 8451... Training loss: 0.9444... 0.1203 sec/batch\n", "Epoch: 19/20... Training Step: 8452... Training loss: 0.8991... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8453... Training loss: 0.8962... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8454... Training loss: 0.9280... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8455... Training loss: 1.0750... 0.1187 sec/batch\n", "Epoch: 19/20... Training Step: 8456... Training loss: 0.9232... 0.1197 sec/batch\n", "Epoch: 19/20... Training Step: 8457... Training loss: 0.9161... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8458... Training loss: 0.9629... 0.1212 sec/batch\n", "Epoch: 19/20... Training Step: 8459... Training loss: 0.7476... 0.1118 sec/batch\n", "Epoch: 19/20... Training Step: 8460... Training loss: 1.0200... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8461... Training loss: 0.8787... 0.1152 sec/batch\n", "Epoch: 19/20... Training Step: 8462... Training loss: 0.8643... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8463... Training loss: 1.0533... 0.1215 sec/batch\n", "Epoch: 19/20... Training Step: 8464... Training loss: 0.8282... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8465... Training loss: 1.0126... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8466... Training loss: 1.0185... 0.1131 sec/batch\n", "Epoch: 19/20... Training Step: 8467... Training loss: 0.9487... 0.1217 sec/batch\n", "Epoch: 19/20... Training Step: 8468... Training loss: 0.8747... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8469... Training loss: 0.9117... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8470... Training loss: 0.8894... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8471... Training loss: 0.8600... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8472... Training loss: 0.7770... 0.1186 sec/batch\n", "Epoch: 19/20... Training Step: 8473... Training loss: 0.8908... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8474... Training loss: 0.9971... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8475... Training loss: 1.0990... 0.1155 sec/batch\n", "Epoch: 19/20... Training Step: 8476... Training loss: 0.8303... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8477... Training loss: 0.9536... 0.1181 sec/batch\n", "Epoch: 19/20... Training Step: 8478... Training loss: 0.7994... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8479... Training loss: 0.8484... 0.1181 sec/batch\n", "Epoch: 19/20... Training Step: 8480... Training loss: 0.9479... 0.1187 sec/batch\n", "Epoch: 19/20... Training Step: 8481... Training loss: 0.7923... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8482... Training loss: 0.9580... 0.1206 sec/batch\n", "Epoch: 19/20... Training Step: 8483... Training loss: 0.8753... 0.1155 sec/batch\n", "Epoch: 19/20... Training Step: 8484... Training loss: 0.9433... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8485... Training loss: 0.8936... 0.1173 sec/batch\n", "Epoch: 19/20... Training Step: 8486... Training loss: 1.0282... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8487... Training loss: 0.9032... 0.1119 sec/batch\n", "Epoch: 19/20... Training Step: 8488... Training loss: 0.8713... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8489... Training loss: 0.8299... 0.1222 sec/batch\n", "Epoch: 19/20... Training Step: 8490... Training loss: 1.0685... 0.1249 sec/batch\n", "Epoch: 19/20... Training Step: 8491... Training loss: 0.9670... 0.1142 sec/batch\n", "Epoch: 19/20... Training Step: 8492... Training loss: 0.7985... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8493... Training loss: 0.8016... 0.1153 sec/batch\n", "Epoch: 19/20... Training Step: 8494... Training loss: 0.8926... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8495... Training loss: 0.8007... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8496... Training loss: 0.9549... 0.1201 sec/batch\n", "Epoch: 19/20... Training Step: 8497... Training loss: 1.0459... 0.1232 sec/batch\n", "Epoch: 19/20... Training Step: 8498... Training loss: 0.8770... 0.1228 sec/batch\n", "Epoch: 19/20... Training Step: 8499... Training loss: 0.9543... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8500... Training loss: 0.7901... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8501... Training loss: 0.7204... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8502... Training loss: 0.9400... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8503... Training loss: 0.7681... 0.1215 sec/batch\n", "Epoch: 19/20... Training Step: 8504... Training loss: 0.8676... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8505... Training loss: 1.0977... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8506... Training loss: 0.8863... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8507... Training loss: 0.8087... 0.1157 sec/batch\n", "Epoch: 19/20... Training Step: 8508... Training loss: 0.9008... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8509... Training loss: 0.8643... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8510... Training loss: 1.0224... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8511... Training loss: 0.8687... 0.1121 sec/batch\n", "Epoch: 19/20... Training Step: 8512... Training loss: 0.9445... 0.1146 sec/batch\n", "Epoch: 19/20... Training Step: 8513... Training loss: 0.9442... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8514... Training loss: 0.8869... 0.1192 sec/batch\n", "Epoch: 19/20... Training Step: 8515... Training loss: 1.1530... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8516... Training loss: 0.8034... 0.1155 sec/batch\n", "Epoch: 19/20... Training Step: 8517... Training loss: 0.8737... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8518... Training loss: 0.7627... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8519... Training loss: 0.7290... 0.1218 sec/batch\n", "Epoch: 19/20... Training Step: 8520... Training loss: 0.9330... 0.1160 sec/batch\n", "Epoch: 19/20... Training Step: 8521... Training loss: 0.8120... 0.1152 sec/batch\n", "Epoch: 19/20... Training Step: 8522... Training loss: 0.8742... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8523... Training loss: 1.0302... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8524... Training loss: 0.9148... 0.1150 sec/batch\n", "Epoch: 19/20... Training Step: 8525... Training loss: 0.8373... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8526... Training loss: 0.7907... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8527... Training loss: 0.9564... 0.1143 sec/batch\n", "Epoch: 19/20... Training Step: 8528... Training loss: 0.8016... 0.1201 sec/batch\n", "Epoch: 19/20... Training Step: 8529... Training loss: 0.8069... 0.1173 sec/batch\n", "Epoch: 19/20... Training Step: 8530... Training loss: 1.0213... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8531... Training loss: 0.7455... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8532... Training loss: 0.9283... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8533... Training loss: 0.6879... 0.1167 sec/batch\n", "Epoch: 19/20... Training Step: 8534... Training loss: 0.9287... 0.1208 sec/batch\n", "Epoch: 19/20... Training Step: 8535... Training loss: 0.9933... 0.1138 sec/batch\n", "Epoch: 19/20... Training Step: 8536... Training loss: 0.7984... 0.1146 sec/batch\n", "Epoch: 19/20... Training Step: 8537... Training loss: 0.9559... 0.1219 sec/batch\n", "Epoch: 19/20... Training Step: 8538... Training loss: 0.9550... 0.1232 sec/batch\n", "Epoch: 19/20... Training Step: 8539... Training loss: 0.9578... 0.1142 sec/batch\n", "Epoch: 19/20... Training Step: 8540... Training loss: 0.7711... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8541... Training loss: 0.9588... 0.1180 sec/batch\n", "Epoch: 19/20... Training Step: 8542... Training loss: 0.8390... 0.1141 sec/batch\n", "Epoch: 19/20... Training Step: 8543... Training loss: 0.7988... 0.1225 sec/batch\n", "Epoch: 19/20... Training Step: 8544... Training loss: 1.0284... 0.1288 sec/batch\n", "Epoch: 19/20... Training Step: 8545... Training loss: 0.8730... 0.1292 sec/batch\n", "Epoch: 19/20... Training Step: 8546... Training loss: 0.9307... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8547... Training loss: 0.8652... 0.1191 sec/batch\n", "Epoch: 19/20... Training Step: 8548... Training loss: 0.9512... 0.1167 sec/batch\n", "Epoch: 19/20... Training Step: 8549... Training loss: 0.8991... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8550... Training loss: 1.0117... 0.1154 sec/batch\n", "Epoch: 19/20... Training Step: 8551... Training loss: 0.8501... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8552... Training loss: 0.8786... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8553... Training loss: 1.0003... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8554... Training loss: 0.9783... 0.1186 sec/batch\n", "Epoch: 19/20... Training Step: 8555... Training loss: 0.9469... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8556... Training loss: 1.0828... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8557... Training loss: 0.6757... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8558... Training loss: 0.8746... 0.1123 sec/batch\n", "Epoch: 19/20... Training Step: 8559... Training loss: 0.8508... 0.1191 sec/batch\n", "Epoch: 19/20... Training Step: 8560... Training loss: 0.9709... 0.1139 sec/batch\n", "Epoch: 19/20... Training Step: 8561... Training loss: 0.8308... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8562... Training loss: 0.6846... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8563... Training loss: 0.9316... 0.1197 sec/batch\n", "Epoch: 19/20... Training Step: 8564... Training loss: 0.9513... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8565... Training loss: 1.0825... 0.1231 sec/batch\n", "Epoch: 19/20... Training Step: 8566... Training loss: 0.8784... 0.1151 sec/batch\n", "Epoch: 19/20... Training Step: 8567... Training loss: 0.9835... 0.1220 sec/batch\n", "Epoch: 19/20... Training Step: 8568... Training loss: 0.8494... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8569... Training loss: 0.8551... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8570... Training loss: 1.0499... 0.1148 sec/batch\n", "Epoch: 19/20... Training Step: 8571... Training loss: 0.9505... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8572... Training loss: 0.8673... 0.1144 sec/batch\n", "Epoch: 19/20... Training Step: 8573... Training loss: 0.8359... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8574... Training loss: 1.2105... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8575... Training loss: 1.0522... 0.1144 sec/batch\n", "Epoch: 19/20... Training Step: 8576... Training loss: 1.0579... 0.1202 sec/batch\n", "Epoch: 19/20... Training Step: 8577... Training loss: 1.0010... 0.1121 sec/batch\n", "Epoch: 19/20... Training Step: 8578... Training loss: 1.0513... 0.1114 sec/batch\n", "Epoch: 19/20... Training Step: 8579... Training loss: 1.0687... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8580... Training loss: 0.9669... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8581... Training loss: 1.1172... 0.1152 sec/batch\n", "Epoch: 19/20... Training Step: 8582... Training loss: 0.8063... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8583... Training loss: 1.0568... 0.1143 sec/batch\n", "Epoch: 19/20... Training Step: 8584... Training loss: 0.9305... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8585... Training loss: 1.1201... 0.1191 sec/batch\n", "Epoch: 19/20... Training Step: 8586... Training loss: 0.9428... 0.1192 sec/batch\n", "Epoch: 19/20... Training Step: 8587... Training loss: 1.0178... 0.1187 sec/batch\n", "Epoch: 19/20... Training Step: 8588... Training loss: 0.9685... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8589... Training loss: 0.9347... 0.1205 sec/batch\n", "Epoch: 19/20... Training Step: 8590... Training loss: 0.7622... 0.1183 sec/batch\n", "Epoch: 19/20... Training Step: 8591... Training loss: 1.0445... 0.1191 sec/batch\n", "Epoch: 19/20... Training Step: 8592... Training loss: 1.0142... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8593... Training loss: 0.9628... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8594... Training loss: 0.9034... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8595... Training loss: 1.1135... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8596... Training loss: 1.0147... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8597... Training loss: 1.0397... 0.1145 sec/batch\n", "Epoch: 19/20... Training Step: 8598... Training loss: 0.9975... 0.1202 sec/batch\n", "Epoch: 19/20... Training Step: 8599... Training loss: 0.9117... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8600... Training loss: 1.1003... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8601... Training loss: 0.9365... 0.1186 sec/batch\n", "Epoch: 19/20... Training Step: 8602... Training loss: 0.8750... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8603... Training loss: 0.9296... 0.1169 sec/batch\n", "Epoch: 19/20... Training Step: 8604... Training loss: 0.9686... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8605... Training loss: 0.9746... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8606... Training loss: 0.8738... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8607... Training loss: 0.9815... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8608... Training loss: 0.9565... 0.1142 sec/batch\n", "Epoch: 19/20... Training Step: 8609... Training loss: 1.1181... 0.1176 sec/batch\n", "Epoch: 19/20... Training Step: 8610... Training loss: 0.8457... 0.1153 sec/batch\n", "Epoch: 19/20... Training Step: 8611... Training loss: 0.7799... 0.1192 sec/batch\n", "Epoch: 19/20... Training Step: 8612... Training loss: 0.9150... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8613... Training loss: 0.9450... 0.1153 sec/batch\n", "Epoch: 19/20... Training Step: 8614... Training loss: 1.0222... 0.1160 sec/batch\n", "Epoch: 19/20... Training Step: 8615... Training loss: 0.9535... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8616... Training loss: 0.9967... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8617... Training loss: 1.0886... 0.1188 sec/batch\n", "Epoch: 19/20... Training Step: 8618... Training loss: 0.8254... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8619... Training loss: 0.9543... 0.1164 sec/batch\n", "Epoch: 19/20... Training Step: 8620... Training loss: 1.0099... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8621... Training loss: 0.9523... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8622... Training loss: 0.9749... 0.1164 sec/batch\n", "Epoch: 19/20... Training Step: 8623... Training loss: 0.8621... 0.1192 sec/batch\n", "Epoch: 19/20... Training Step: 8624... Training loss: 1.0358... 0.1183 sec/batch\n", "Epoch: 19/20... Training Step: 8625... Training loss: 1.0173... 0.1153 sec/batch\n", "Epoch: 19/20... Training Step: 8626... Training loss: 0.9774... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8627... Training loss: 1.0074... 0.1215 sec/batch\n", "Epoch: 19/20... Training Step: 8628... Training loss: 0.8813... 0.1180 sec/batch\n", "Epoch: 19/20... Training Step: 8629... Training loss: 0.9027... 0.1164 sec/batch\n", "Epoch: 19/20... Training Step: 8630... Training loss: 1.0435... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8631... Training loss: 1.0327... 0.1221 sec/batch\n", "Epoch: 19/20... Training Step: 8632... Training loss: 1.0001... 0.1297 sec/batch\n", "Epoch: 19/20... Training Step: 8633... Training loss: 0.9351... 0.1373 sec/batch\n", "Epoch: 19/20... Training Step: 8634... Training loss: 0.9994... 0.1347 sec/batch\n", "Epoch: 19/20... Training Step: 8635... Training loss: 0.9588... 0.1331 sec/batch\n", "Epoch: 19/20... Training Step: 8636... Training loss: 1.0084... 0.1430 sec/batch\n", "Epoch: 19/20... Training Step: 8637... Training loss: 0.9255... 0.1279 sec/batch\n", "Epoch: 19/20... Training Step: 8638... Training loss: 0.9660... 0.1264 sec/batch\n", "Epoch: 19/20... Training Step: 8639... Training loss: 0.9446... 0.1227 sec/batch\n", "Epoch: 19/20... Training Step: 8640... Training loss: 0.9855... 0.1240 sec/batch\n", "Epoch: 19/20... Training Step: 8641... Training loss: 0.9979... 0.1274 sec/batch\n", "Epoch: 19/20... Training Step: 8642... Training loss: 0.8923... 0.1425 sec/batch\n", "Epoch: 19/20... Training Step: 8643... Training loss: 0.9811... 0.1348 sec/batch\n", "Epoch: 19/20... Training Step: 8644... Training loss: 0.8238... 0.1243 sec/batch\n", "Epoch: 19/20... Training Step: 8645... Training loss: 0.8397... 0.1239 sec/batch\n", "Epoch: 19/20... Training Step: 8646... Training loss: 0.8761... 0.1522 sec/batch\n", "Epoch: 19/20... Training Step: 8647... Training loss: 1.0037... 0.1488 sec/batch\n", "Epoch: 19/20... Training Step: 8648... Training loss: 1.2037... 0.1475 sec/batch\n", "Epoch: 19/20... Training Step: 8649... Training loss: 0.8352... 0.1397 sec/batch\n", "Epoch: 19/20... Training Step: 8650... Training loss: 0.8239... 0.1374 sec/batch\n", "Epoch: 19/20... Training Step: 8651... Training loss: 0.8554... 0.1343 sec/batch\n", "Epoch: 19/20... Training Step: 8652... Training loss: 1.0027... 0.1313 sec/batch\n", "Epoch: 19/20... Training Step: 8653... Training loss: 1.0996... 0.1231 sec/batch\n", "Epoch: 19/20... Training Step: 8654... Training loss: 0.9638... 0.1236 sec/batch\n", "Epoch: 19/20... Training Step: 8655... Training loss: 0.8117... 0.1274 sec/batch\n", "Epoch: 19/20... Training Step: 8656... Training loss: 1.2058... 0.1277 sec/batch\n", "Epoch: 19/20... Training Step: 8657... Training loss: 0.8405... 0.1434 sec/batch\n", "Epoch: 19/20... Training Step: 8658... Training loss: 0.9427... 0.1224 sec/batch\n", "Epoch: 19/20... Training Step: 8659... Training loss: 1.0139... 0.1215 sec/batch\n", "Epoch: 19/20... Training Step: 8660... Training loss: 1.0978... 0.1315 sec/batch\n", "Epoch: 19/20... Training Step: 8661... Training loss: 1.1291... 0.1297 sec/batch\n", "Epoch: 19/20... Training Step: 8662... Training loss: 1.0864... 0.1273 sec/batch\n", "Epoch: 19/20... Training Step: 8663... Training loss: 0.8977... 0.1274 sec/batch\n", "Epoch: 19/20... Training Step: 8664... Training loss: 0.7591... 0.1293 sec/batch\n", "Epoch: 19/20... Training Step: 8665... Training loss: 0.8880... 0.1242 sec/batch\n", "Epoch: 19/20... Training Step: 8666... Training loss: 0.7792... 0.1245 sec/batch\n", "Epoch: 19/20... Training Step: 8667... Training loss: 0.8290... 0.1212 sec/batch\n", "Epoch: 19/20... Training Step: 8668... Training loss: 0.7427... 0.1218 sec/batch\n", "Epoch: 19/20... Training Step: 8669... Training loss: 0.8176... 0.1274 sec/batch\n", "Epoch: 19/20... Training Step: 8670... Training loss: 0.8131... 0.1260 sec/batch\n", "Epoch: 19/20... Training Step: 8671... Training loss: 0.7967... 0.1211 sec/batch\n", "Epoch: 19/20... Training Step: 8672... Training loss: 0.8071... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8673... Training loss: 0.7006... 0.1173 sec/batch\n", "Epoch: 19/20... Training Step: 8674... Training loss: 1.0849... 0.1196 sec/batch\n", "Epoch: 19/20... Training Step: 8675... Training loss: 0.8566... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8676... Training loss: 0.9283... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8677... Training loss: 0.8146... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8678... Training loss: 0.7066... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8679... Training loss: 0.8988... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8680... Training loss: 0.9267... 0.1192 sec/batch\n", "Epoch: 19/20... Training Step: 8681... Training loss: 0.9626... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8682... Training loss: 0.8964... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8683... Training loss: 0.9740... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8684... Training loss: 0.9962... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8685... Training loss: 0.8942... 0.1203 sec/batch\n", "Epoch: 19/20... Training Step: 8686... Training loss: 0.9387... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8687... Training loss: 0.8946... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8688... Training loss: 0.9599... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8689... Training loss: 0.6985... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8690... Training loss: 0.7347... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8691... Training loss: 0.9406... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8692... Training loss: 0.8813... 0.1219 sec/batch\n", "Epoch: 19/20... Training Step: 8693... Training loss: 0.8424... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8694... Training loss: 0.9697... 0.1283 sec/batch\n", "Epoch: 19/20... Training Step: 8695... Training loss: 0.7380... 0.1292 sec/batch\n", "Epoch: 19/20... Training Step: 8696... Training loss: 0.9233... 0.1218 sec/batch\n", "Epoch: 19/20... Training Step: 8697... Training loss: 0.8900... 0.1233 sec/batch\n", "Epoch: 19/20... Training Step: 8698... Training loss: 0.9082... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8699... Training loss: 0.8763... 0.1228 sec/batch\n", "Epoch: 19/20... Training Step: 8700... Training loss: 0.9928... 0.1186 sec/batch\n", "Epoch: 19/20... Training Step: 8701... Training loss: 0.8402... 0.1180 sec/batch\n", "Epoch: 19/20... Training Step: 8702... Training loss: 0.9897... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8703... Training loss: 0.9604... 0.1224 sec/batch\n", "Epoch: 19/20... Training Step: 8704... Training loss: 0.8964... 0.1131 sec/batch\n", "Epoch: 19/20... Training Step: 8705... Training loss: 0.8219... 0.1166 sec/batch\n", "Epoch: 19/20... Training Step: 8706... Training loss: 0.7084... 0.1142 sec/batch\n", "Epoch: 19/20... Training Step: 8707... Training loss: 0.8767... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8708... Training loss: 0.8752... 0.1146 sec/batch\n", "Epoch: 19/20... Training Step: 8709... Training loss: 0.8093... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8710... Training loss: 0.8577... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8711... Training loss: 0.9847... 0.1165 sec/batch\n", "Epoch: 19/20... Training Step: 8712... Training loss: 0.7757... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8713... Training loss: 0.9510... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8714... Training loss: 0.9913... 0.1164 sec/batch\n", "Epoch: 19/20... Training Step: 8715... Training loss: 0.7603... 0.1202 sec/batch\n", "Epoch: 19/20... Training Step: 8716... Training loss: 0.8238... 0.1169 sec/batch\n", "Epoch: 19/20... Training Step: 8717... Training loss: 0.9700... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8718... Training loss: 0.9789... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8719... Training loss: 0.9734... 0.1237 sec/batch\n", "Epoch: 19/20... Training Step: 8720... Training loss: 1.0859... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8721... Training loss: 1.0095... 0.1153 sec/batch\n", "Epoch: 19/20... Training Step: 8722... Training loss: 1.0099... 0.1182 sec/batch\n", "Epoch: 19/20... Training Step: 8723... Training loss: 0.8584... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8724... Training loss: 1.1378... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8725... Training loss: 0.8288... 0.1172 sec/batch\n", "Epoch: 19/20... Training Step: 8726... Training loss: 1.0795... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8727... Training loss: 0.7935... 0.1206 sec/batch\n", "Epoch: 19/20... Training Step: 8728... Training loss: 1.0034... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8729... Training loss: 1.0003... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8730... Training loss: 0.8986... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8731... Training loss: 0.8314... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8732... Training loss: 1.0329... 0.1178 sec/batch\n", "Epoch: 19/20... Training Step: 8733... Training loss: 0.8491... 0.1193 sec/batch\n", "Epoch: 19/20... Training Step: 8734... Training loss: 0.9716... 0.1177 sec/batch\n", "Epoch: 19/20... Training Step: 8735... Training loss: 0.8704... 0.1201 sec/batch\n", "Epoch: 19/20... Training Step: 8736... Training loss: 0.9014... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8737... Training loss: 0.7960... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8738... Training loss: 0.8435... 0.1170 sec/batch\n", "Epoch: 19/20... Training Step: 8739... Training loss: 1.0479... 0.1174 sec/batch\n", "Epoch: 19/20... Training Step: 8740... Training loss: 0.8862... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8741... Training loss: 0.7509... 0.1162 sec/batch\n", "Epoch: 19/20... Training Step: 8742... Training loss: 0.8779... 0.1159 sec/batch\n", "Epoch: 19/20... Training Step: 8743... Training loss: 0.8517... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8744... Training loss: 0.9834... 0.1145 sec/batch\n", "Epoch: 19/20... Training Step: 8745... Training loss: 0.9837... 0.1199 sec/batch\n", "Epoch: 19/20... Training Step: 8746... Training loss: 1.0526... 0.1218 sec/batch\n", "Epoch: 19/20... Training Step: 8747... Training loss: 0.7602... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8748... Training loss: 1.0128... 0.1181 sec/batch\n", "Epoch: 19/20... Training Step: 8749... Training loss: 0.9509... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8750... Training loss: 0.8920... 0.1210 sec/batch\n", "Epoch: 19/20... Training Step: 8751... Training loss: 1.0427... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8752... Training loss: 0.9562... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8753... Training loss: 0.8233... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8754... Training loss: 0.9544... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8755... Training loss: 0.7760... 0.1171 sec/batch\n", "Epoch: 19/20... Training Step: 8756... Training loss: 0.8668... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8757... Training loss: 0.9271... 0.1156 sec/batch\n", "Epoch: 19/20... Training Step: 8758... Training loss: 1.0837... 0.1217 sec/batch\n", "Epoch: 19/20... Training Step: 8759... Training loss: 0.7970... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8760... Training loss: 1.2281... 0.1144 sec/batch\n", "Epoch: 19/20... Training Step: 8761... Training loss: 0.9041... 0.1228 sec/batch\n", "Epoch: 19/20... Training Step: 8762... Training loss: 0.8894... 0.1187 sec/batch\n", "Epoch: 19/20... Training Step: 8763... Training loss: 0.8122... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8764... Training loss: 1.0320... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8765... Training loss: 0.9956... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8766... Training loss: 0.8656... 0.1149 sec/batch\n", "Epoch: 19/20... Training Step: 8767... Training loss: 1.0168... 0.1220 sec/batch\n", "Epoch: 19/20... Training Step: 8768... Training loss: 0.9922... 0.1212 sec/batch\n", "Epoch: 19/20... Training Step: 8769... Training loss: 0.8688... 0.1184 sec/batch\n", "Epoch: 19/20... Training Step: 8770... Training loss: 0.7241... 0.1239 sec/batch\n", "Epoch: 19/20... Training Step: 8771... Training loss: 0.7804... 0.1130 sec/batch\n", "Epoch: 19/20... Training Step: 8772... Training loss: 0.8463... 0.1195 sec/batch\n", "Epoch: 19/20... Training Step: 8773... Training loss: 1.0351... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8774... Training loss: 0.8016... 0.1175 sec/batch\n", "Epoch: 19/20... Training Step: 8775... Training loss: 0.7794... 0.1181 sec/batch\n", "Epoch: 19/20... Training Step: 8776... Training loss: 0.9351... 0.1183 sec/batch\n", "Epoch: 19/20... Training Step: 8777... Training loss: 0.9148... 0.1163 sec/batch\n", "Epoch: 19/20... Training Step: 8778... Training loss: 0.9751... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8779... Training loss: 0.9197... 0.1190 sec/batch\n", "Epoch: 19/20... Training Step: 8780... Training loss: 0.9926... 0.1164 sec/batch\n", "Epoch: 19/20... Training Step: 8781... Training loss: 0.7751... 0.1211 sec/batch\n", "Epoch: 19/20... Training Step: 8782... Training loss: 0.9099... 0.1200 sec/batch\n", "Epoch: 19/20... Training Step: 8783... Training loss: 1.0411... 0.1097 sec/batch\n", "Epoch: 19/20... Training Step: 8784... Training loss: 0.8835... 0.1168 sec/batch\n", "Epoch: 19/20... Training Step: 8785... Training loss: 1.0076... 0.1143 sec/batch\n", "Epoch: 19/20... Training Step: 8786... Training loss: 0.8266... 0.1189 sec/batch\n", "Epoch: 19/20... Training Step: 8787... Training loss: 0.8346... 0.1179 sec/batch\n", "Epoch: 19/20... Training Step: 8788... Training loss: 0.9870... 0.1151 sec/batch\n", "Epoch: 19/20... Training Step: 8789... Training loss: 0.9223... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8790... Training loss: 0.8797... 0.1157 sec/batch\n", "Epoch: 19/20... Training Step: 8791... Training loss: 1.0656... 0.1131 sec/batch\n", "Epoch: 19/20... Training Step: 8792... Training loss: 1.0199... 0.1102 sec/batch\n", "Epoch: 19/20... Training Step: 8793... Training loss: 0.7454... 0.1139 sec/batch\n", "Epoch: 19/20... Training Step: 8794... Training loss: 0.8662... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8795... Training loss: 0.9000... 0.1232 sec/batch\n", "Epoch: 19/20... Training Step: 8796... Training loss: 1.0188... 0.1210 sec/batch\n", "Epoch: 19/20... Training Step: 8797... Training loss: 0.8860... 0.1230 sec/batch\n", "Epoch: 19/20... Training Step: 8798... Training loss: 0.7374... 0.1207 sec/batch\n", "Epoch: 19/20... Training Step: 8799... Training loss: 1.0156... 0.1212 sec/batch\n", "Epoch: 19/20... Training Step: 8800... Training loss: 0.8740... 0.1180 sec/batch\n", "Epoch: 19/20... Training Step: 8801... Training loss: 0.8626... 0.1194 sec/batch\n", "Epoch: 19/20... Training Step: 8802... Training loss: 0.9347... 0.1161 sec/batch\n", "Epoch: 19/20... Training Step: 8803... Training loss: 0.8098... 0.1204 sec/batch\n", "Epoch: 19/20... Training Step: 8804... Training loss: 0.9239... 0.1198 sec/batch\n", "Epoch: 19/20... Training Step: 8805... Training loss: 0.8367... 0.1152 sec/batch\n", "Epoch: 19/20... Training Step: 8806... Training loss: 0.9629... 0.1188 sec/batch\n", "Epoch: 19/20... Training Step: 8807... Training loss: 1.0109... 0.1137 sec/batch\n", "Epoch: 19/20... Training Step: 8808... Training loss: 0.8238... 0.1196 sec/batch\n", "Epoch: 19/20... Training Step: 8809... Training loss: 0.8892... 0.1185 sec/batch\n", "Epoch: 19/20... Training Step: 8810... Training loss: 0.7549... 0.1147 sec/batch\n", "Epoch: 19/20... Training Step: 8811... Training loss: 0.8526... 0.1213 sec/batch\n", "Epoch: 19/20... Training Step: 8812... Training loss: 0.9960... 0.1158 sec/batch\n", "Epoch: 19/20... Training Step: 8813... Training loss: 0.8994... 0.1143 sec/batch\n", "Epoch: 19/20... Training Step: 8814... Training loss: 0.8955... 0.1191 sec/batch\n", "Epoch: 19/20... Training Step: 8815... Training loss: 0.9463... 0.1141 sec/batch\n", "Epoch: 19/20... Training Step: 8816... Training loss: 0.7819... 0.1106 sec/batch\n", "Epoch: 20/20... Training Step: 8817... Training loss: 1.0918... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 8818... Training loss: 1.0805... 0.1164 sec/batch\n", "Epoch: 20/20... Training Step: 8819... Training loss: 1.0695... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 8820... Training loss: 0.9268... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 8821... Training loss: 0.8752... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 8822... Training loss: 0.9147... 0.1162 sec/batch\n", "Epoch: 20/20... Training Step: 8823... Training loss: 0.9330... 0.1240 sec/batch\n", "Epoch: 20/20... Training Step: 8824... Training loss: 0.9079... 0.1152 sec/batch\n", "Epoch: 20/20... Training Step: 8825... Training loss: 0.6895... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 8826... Training loss: 0.9698... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 8827... Training loss: 0.8608... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 8828... Training loss: 0.8357... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 8829... Training loss: 1.0906... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 8830... Training loss: 0.6900... 0.1225 sec/batch\n", "Epoch: 20/20... Training Step: 8831... Training loss: 1.0717... 0.1142 sec/batch\n", "Epoch: 20/20... Training Step: 8832... Training loss: 1.0810... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 8833... Training loss: 0.8748... 0.1198 sec/batch\n", "Epoch: 20/20... Training Step: 8834... Training loss: 0.8476... 0.1131 sec/batch\n", "Epoch: 20/20... Training Step: 8835... Training loss: 0.8136... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 8836... Training loss: 0.8428... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 8837... Training loss: 0.9948... 0.1209 sec/batch\n", "Epoch: 20/20... Training Step: 8838... Training loss: 0.8176... 0.1139 sec/batch\n", "Epoch: 20/20... Training Step: 8839... Training loss: 1.0069... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 8840... Training loss: 0.8593... 0.1183 sec/batch\n", "Epoch: 20/20... Training Step: 8841... Training loss: 0.8265... 0.1131 sec/batch\n", "Epoch: 20/20... Training Step: 8842... Training loss: 0.8502... 0.1268 sec/batch\n", "Epoch: 20/20... Training Step: 8843... Training loss: 0.9686... 0.1345 sec/batch\n", "Epoch: 20/20... Training Step: 8844... Training loss: 0.8820... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 8845... Training loss: 0.9037... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 8846... Training loss: 0.9038... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 8847... Training loss: 0.9061... 0.1191 sec/batch\n", "Epoch: 20/20... Training Step: 8848... Training loss: 0.8266... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 8849... Training loss: 0.8659... 0.1143 sec/batch\n", "Epoch: 20/20... Training Step: 8850... Training loss: 0.7976... 0.1224 sec/batch\n", "Epoch: 20/20... Training Step: 8851... Training loss: 0.7555... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 8852... Training loss: 0.7740... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8853... Training loss: 0.9216... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 8854... Training loss: 0.8655... 0.1156 sec/batch\n", "Epoch: 20/20... Training Step: 8855... Training loss: 0.8282... 0.1189 sec/batch\n", "Epoch: 20/20... Training Step: 8856... Training loss: 1.0132... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8857... Training loss: 0.8730... 0.1152 sec/batch\n", "Epoch: 20/20... Training Step: 8858... Training loss: 0.7320... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 8859... Training loss: 1.1051... 0.1289 sec/batch\n", "Epoch: 20/20... Training Step: 8860... Training loss: 0.7503... 0.1258 sec/batch\n", "Epoch: 20/20... Training Step: 8861... Training loss: 0.8582... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8862... Training loss: 0.8159... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 8863... Training loss: 0.8863... 0.1155 sec/batch\n", "Epoch: 20/20... Training Step: 8864... Training loss: 0.9806... 0.1163 sec/batch\n", "Epoch: 20/20... Training Step: 8865... Training loss: 1.0301... 0.1211 sec/batch\n", "Epoch: 20/20... Training Step: 8866... Training loss: 0.8712... 0.1141 sec/batch\n", "Epoch: 20/20... Training Step: 8867... Training loss: 0.8991... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 8868... Training loss: 0.7802... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 8869... Training loss: 0.9684... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 8870... Training loss: 0.7605... 0.1141 sec/batch\n", "Epoch: 20/20... Training Step: 8871... Training loss: 0.9067... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 8872... Training loss: 0.8332... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 8873... Training loss: 0.8143... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 8874... Training loss: 1.0187... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 8875... Training loss: 0.7384... 0.1196 sec/batch\n", "Epoch: 20/20... Training Step: 8876... Training loss: 0.7842... 0.1199 sec/batch\n", "Epoch: 20/20... Training Step: 8877... Training loss: 0.7306... 0.1163 sec/batch\n", "Epoch: 20/20... Training Step: 8878... Training loss: 0.9839... 0.1163 sec/batch\n", "Epoch: 20/20... Training Step: 8879... Training loss: 0.8883... 0.1173 sec/batch\n", "Epoch: 20/20... Training Step: 8880... Training loss: 1.0608... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 8881... Training loss: 0.7529... 0.1189 sec/batch\n", "Epoch: 20/20... Training Step: 8882... Training loss: 0.9504... 0.1229 sec/batch\n", "Epoch: 20/20... Training Step: 8883... Training loss: 0.9425... 0.1221 sec/batch\n", "Epoch: 20/20... Training Step: 8884... Training loss: 0.9367... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 8885... Training loss: 0.7165... 0.1208 sec/batch\n", "Epoch: 20/20... Training Step: 8886... Training loss: 0.8613... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 8887... Training loss: 1.0065... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8888... Training loss: 0.7723... 0.1197 sec/batch\n", "Epoch: 20/20... Training Step: 8889... Training loss: 0.8223... 0.1185 sec/batch\n", "Epoch: 20/20... Training Step: 8890... Training loss: 0.6290... 0.1131 sec/batch\n", "Epoch: 20/20... Training Step: 8891... Training loss: 1.0759... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 8892... Training loss: 0.7993... 0.1140 sec/batch\n", "Epoch: 20/20... Training Step: 8893... Training loss: 0.7310... 0.1145 sec/batch\n", "Epoch: 20/20... Training Step: 8894... Training loss: 0.9518... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8895... Training loss: 0.9445... 0.1132 sec/batch\n", "Epoch: 20/20... Training Step: 8896... Training loss: 0.8700... 0.1199 sec/batch\n", "Epoch: 20/20... Training Step: 8897... Training loss: 1.0118... 0.1145 sec/batch\n", "Epoch: 20/20... Training Step: 8898... Training loss: 0.9731... 0.1158 sec/batch\n", "Epoch: 20/20... Training Step: 8899... Training loss: 0.8536... 0.1167 sec/batch\n", "Epoch: 20/20... Training Step: 8900... Training loss: 0.8950... 0.1189 sec/batch\n", "Epoch: 20/20... Training Step: 8901... Training loss: 0.9437... 0.1188 sec/batch\n", "Epoch: 20/20... Training Step: 8902... Training loss: 1.0797... 0.1211 sec/batch\n", "Epoch: 20/20... Training Step: 8903... Training loss: 0.8976... 0.1198 sec/batch\n", "Epoch: 20/20... Training Step: 8904... Training loss: 0.8999... 0.1206 sec/batch\n", "Epoch: 20/20... Training Step: 8905... Training loss: 1.0418... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 8906... Training loss: 0.9332... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 8907... Training loss: 1.0459... 0.1130 sec/batch\n", "Epoch: 20/20... Training Step: 8908... Training loss: 1.0899... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 8909... Training loss: 0.8505... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 8910... Training loss: 1.0208... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 8911... Training loss: 0.9588... 0.1227 sec/batch\n", "Epoch: 20/20... Training Step: 8912... Training loss: 0.8852... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 8913... Training loss: 1.0525... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 8914... Training loss: 1.0746... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 8915... Training loss: 0.9858... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 8916... Training loss: 0.8754... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 8917... Training loss: 1.0372... 0.1178 sec/batch\n", "Epoch: 20/20... Training Step: 8918... Training loss: 0.8220... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 8919... Training loss: 0.9794... 0.1132 sec/batch\n", "Epoch: 20/20... Training Step: 8920... Training loss: 0.9106... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 8921... Training loss: 0.9227... 0.1192 sec/batch\n", "Epoch: 20/20... Training Step: 8922... Training loss: 1.1097... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 8923... Training loss: 0.8144... 0.1179 sec/batch\n", "Epoch: 20/20... Training Step: 8924... Training loss: 0.9399... 0.1141 sec/batch\n", "Epoch: 20/20... Training Step: 8925... Training loss: 1.0074... 0.1138 sec/batch\n", "Epoch: 20/20... Training Step: 8926... Training loss: 0.7132... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 8927... Training loss: 0.9699... 0.1200 sec/batch\n", "Epoch: 20/20... Training Step: 8928... Training loss: 0.7882... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 8929... Training loss: 0.9458... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 8930... Training loss: 1.0983... 0.1215 sec/batch\n", "Epoch: 20/20... Training Step: 8931... Training loss: 0.8896... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 8932... Training loss: 0.7918... 0.1152 sec/batch\n", "Epoch: 20/20... Training Step: 8933... Training loss: 0.9632... 0.1158 sec/batch\n", "Epoch: 20/20... Training Step: 8934... Training loss: 0.9932... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 8935... Training loss: 1.0321... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 8936... Training loss: 0.7612... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 8937... Training loss: 1.0257... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 8938... Training loss: 0.8816... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 8939... Training loss: 1.0040... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 8940... Training loss: 0.9266... 0.1206 sec/batch\n", "Epoch: 20/20... Training Step: 8941... Training loss: 0.9272... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 8942... Training loss: 0.8968... 0.1139 sec/batch\n", "Epoch: 20/20... Training Step: 8943... Training loss: 0.8247... 0.1134 sec/batch\n", "Epoch: 20/20... Training Step: 8944... Training loss: 1.0753... 0.1100 sec/batch\n", "Epoch: 20/20... Training Step: 8945... Training loss: 0.8436... 0.1133 sec/batch\n", "Epoch: 20/20... Training Step: 8946... Training loss: 0.9316... 0.1225 sec/batch\n", "Epoch: 20/20... Training Step: 8947... Training loss: 0.9805... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 8948... Training loss: 0.8653... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 8949... Training loss: 0.8466... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 8950... Training loss: 0.9421... 0.1197 sec/batch\n", "Epoch: 20/20... Training Step: 8951... Training loss: 0.7982... 0.1128 sec/batch\n", "Epoch: 20/20... Training Step: 8952... Training loss: 0.8027... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 8953... Training loss: 0.7150... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 8954... Training loss: 0.9663... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 8955... Training loss: 0.8010... 0.1191 sec/batch\n", "Epoch: 20/20... Training Step: 8956... Training loss: 1.0654... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 8957... Training loss: 0.7089... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 8958... Training loss: 0.7683... 0.1134 sec/batch\n", "Epoch: 20/20... Training Step: 8959... Training loss: 0.8130... 0.1157 sec/batch\n", "Epoch: 20/20... Training Step: 8960... Training loss: 0.8918... 0.1122 sec/batch\n", "Epoch: 20/20... Training Step: 8961... Training loss: 0.9360... 0.1203 sec/batch\n", "Epoch: 20/20... Training Step: 8962... Training loss: 0.7608... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 8963... Training loss: 0.9019... 0.1231 sec/batch\n", "Epoch: 20/20... Training Step: 8964... Training loss: 0.7517... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 8965... Training loss: 0.8221... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 8966... Training loss: 0.8391... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 8967... Training loss: 0.7844... 0.1140 sec/batch\n", "Epoch: 20/20... Training Step: 8968... Training loss: 1.0134... 0.1152 sec/batch\n", "Epoch: 20/20... Training Step: 8969... Training loss: 0.9644... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 8970... Training loss: 0.8928... 0.1212 sec/batch\n", "Epoch: 20/20... Training Step: 8971... Training loss: 0.8457... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 8972... Training loss: 0.9593... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 8973... Training loss: 0.8496... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 8974... Training loss: 0.8228... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 8975... Training loss: 0.7430... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 8976... Training loss: 0.9144... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 8977... Training loss: 0.8245... 0.1210 sec/batch\n", "Epoch: 20/20... Training Step: 8978... Training loss: 0.8815... 0.1221 sec/batch\n", "Epoch: 20/20... Training Step: 8979... Training loss: 1.0613... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 8980... Training loss: 0.9469... 0.1210 sec/batch\n", "Epoch: 20/20... Training Step: 8981... Training loss: 0.9437... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 8982... Training loss: 0.7776... 0.1173 sec/batch\n", "Epoch: 20/20... Training Step: 8983... Training loss: 0.7832... 0.1162 sec/batch\n", "Epoch: 20/20... Training Step: 8984... Training loss: 0.8391... 0.1196 sec/batch\n", "Epoch: 20/20... Training Step: 8985... Training loss: 0.6496... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 8986... Training loss: 0.8317... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 8987... Training loss: 1.0071... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 8988... Training loss: 0.9279... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 8989... Training loss: 0.7862... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 8990... Training loss: 0.8126... 0.1193 sec/batch\n", "Epoch: 20/20... Training Step: 8991... Training loss: 0.9678... 0.1147 sec/batch\n", "Epoch: 20/20... Training Step: 8992... Training loss: 0.8604... 0.1148 sec/batch\n", "Epoch: 20/20... Training Step: 8993... Training loss: 0.8688... 0.1164 sec/batch\n", "Epoch: 20/20... Training Step: 8994... Training loss: 0.9994... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 8995... Training loss: 0.7886... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 8996... Training loss: 0.9292... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 8997... Training loss: 0.6824... 0.1184 sec/batch\n", "Epoch: 20/20... Training Step: 8998... Training loss: 1.0078... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 8999... Training loss: 0.9503... 0.1149 sec/batch\n", "Epoch: 20/20... Training Step: 9000... Training loss: 0.9196... 0.1191 sec/batch\n", "Epoch: 20/20... Training Step: 9001... Training loss: 0.9192... 0.1183 sec/batch\n", "Epoch: 20/20... Training Step: 9002... Training loss: 1.0750... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 9003... Training loss: 0.9306... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9004... Training loss: 0.8100... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 9005... Training loss: 1.1632... 0.1185 sec/batch\n", "Epoch: 20/20... Training Step: 9006... Training loss: 0.9077... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 9007... Training loss: 0.8224... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9008... Training loss: 0.8393... 0.1125 sec/batch\n", "Epoch: 20/20... Training Step: 9009... Training loss: 0.7627... 0.1145 sec/batch\n", "Epoch: 20/20... Training Step: 9010... Training loss: 0.8851... 0.1089 sec/batch\n", "Epoch: 20/20... Training Step: 9011... Training loss: 0.9932... 0.1207 sec/batch\n", "Epoch: 20/20... Training Step: 9012... Training loss: 0.8448... 0.1153 sec/batch\n", "Epoch: 20/20... Training Step: 9013... Training loss: 0.8176... 0.1219 sec/batch\n", "Epoch: 20/20... Training Step: 9014... Training loss: 0.8369... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 9015... Training loss: 0.8002... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9016... Training loss: 0.8148... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 9017... Training loss: 0.9777... 0.1215 sec/batch\n", "Epoch: 20/20... Training Step: 9018... Training loss: 1.0977... 0.1207 sec/batch\n", "Epoch: 20/20... Training Step: 9019... Training loss: 0.9244... 0.1208 sec/batch\n", "Epoch: 20/20... Training Step: 9020... Training loss: 0.8794... 0.1138 sec/batch\n", "Epoch: 20/20... Training Step: 9021... Training loss: 0.8934... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 9022... Training loss: 0.9645... 0.1143 sec/batch\n", "Epoch: 20/20... Training Step: 9023... Training loss: 0.8764... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 9024... Training loss: 0.8991... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9025... Training loss: 0.9204... 0.1150 sec/batch\n", "Epoch: 20/20... Training Step: 9026... Training loss: 0.8325... 0.1164 sec/batch\n", "Epoch: 20/20... Training Step: 9027... Training loss: 0.9816... 0.1192 sec/batch\n", "Epoch: 20/20... Training Step: 9028... Training loss: 0.9713... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 9029... Training loss: 0.9334... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9030... Training loss: 0.8470... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9031... Training loss: 1.1104... 0.1119 sec/batch\n", "Epoch: 20/20... Training Step: 9032... Training loss: 0.8809... 0.1183 sec/batch\n", "Epoch: 20/20... Training Step: 9033... Training loss: 0.9266... 0.1155 sec/batch\n", "Epoch: 20/20... Training Step: 9034... Training loss: 0.9784... 0.1167 sec/batch\n", "Epoch: 20/20... Training Step: 9035... Training loss: 0.8808... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 9036... Training loss: 1.0281... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9037... Training loss: 0.8338... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 9038... Training loss: 1.1674... 0.1155 sec/batch\n", "Epoch: 20/20... Training Step: 9039... Training loss: 1.0344... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 9040... Training loss: 1.0054... 0.1200 sec/batch\n", "Epoch: 20/20... Training Step: 9041... Training loss: 0.9361... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 9042... Training loss: 1.1434... 0.1133 sec/batch\n", "Epoch: 20/20... Training Step: 9043... Training loss: 0.9198... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9044... Training loss: 0.9942... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9045... Training loss: 0.9581... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 9046... Training loss: 0.9332... 0.1274 sec/batch\n", "Epoch: 20/20... Training Step: 9047... Training loss: 0.9967... 0.1257 sec/batch\n", "Epoch: 20/20... Training Step: 9048... Training loss: 0.9245... 0.1157 sec/batch\n", "Epoch: 20/20... Training Step: 9049... Training loss: 1.1653... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 9050... Training loss: 1.0346... 0.1197 sec/batch\n", "Epoch: 20/20... Training Step: 9051... Training loss: 1.0542... 0.1158 sec/batch\n", "Epoch: 20/20... Training Step: 9052... Training loss: 0.9176... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9053... Training loss: 0.8489... 0.1212 sec/batch\n", "Epoch: 20/20... Training Step: 9054... Training loss: 0.7823... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 9055... Training loss: 1.0207... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 9056... Training loss: 0.8765... 0.1148 sec/batch\n", "Epoch: 20/20... Training Step: 9057... Training loss: 0.8805... 0.1184 sec/batch\n", "Epoch: 20/20... Training Step: 9058... Training loss: 0.8755... 0.1192 sec/batch\n", "Epoch: 20/20... Training Step: 9059... Training loss: 0.9831... 0.1142 sec/batch\n", "Epoch: 20/20... Training Step: 9060... Training loss: 0.8332... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 9061... Training loss: 0.9621... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9062... Training loss: 0.8524... 0.1140 sec/batch\n", "Epoch: 20/20... Training Step: 9063... Training loss: 0.9578... 0.1139 sec/batch\n", "Epoch: 20/20... Training Step: 9064... Training loss: 0.9394... 0.1210 sec/batch\n", "Epoch: 20/20... Training Step: 9065... Training loss: 1.0679... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9066... Training loss: 0.8760... 0.1193 sec/batch\n", "Epoch: 20/20... Training Step: 9067... Training loss: 0.9247... 0.1196 sec/batch\n", "Epoch: 20/20... Training Step: 9068... Training loss: 0.9506... 0.1180 sec/batch\n", "Epoch: 20/20... Training Step: 9069... Training loss: 0.8852... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 9070... Training loss: 0.9142... 0.1185 sec/batch\n", "Epoch: 20/20... Training Step: 9071... Training loss: 0.8686... 0.1145 sec/batch\n", "Epoch: 20/20... Training Step: 9072... Training loss: 0.9100... 0.1211 sec/batch\n", "Epoch: 20/20... Training Step: 9073... Training loss: 1.0016... 0.1199 sec/batch\n", "Epoch: 20/20... Training Step: 9074... Training loss: 0.8183... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 9075... Training loss: 0.8091... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 9076... Training loss: 0.7784... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 9077... Training loss: 0.9054... 0.1138 sec/batch\n", "Epoch: 20/20... Training Step: 9078... Training loss: 1.0017... 0.1151 sec/batch\n", "Epoch: 20/20... Training Step: 9079... Training loss: 0.9674... 0.1178 sec/batch\n", "Epoch: 20/20... Training Step: 9080... Training loss: 1.0301... 0.1191 sec/batch\n", "Epoch: 20/20... Training Step: 9081... Training loss: 1.1748... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 9082... Training loss: 1.0022... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9083... Training loss: 0.9662... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 9084... Training loss: 0.9975... 0.1147 sec/batch\n", "Epoch: 20/20... Training Step: 9085... Training loss: 0.9101... 0.1140 sec/batch\n", "Epoch: 20/20... Training Step: 9086... Training loss: 0.9773... 0.1227 sec/batch\n", "Epoch: 20/20... Training Step: 9087... Training loss: 0.8906... 0.1247 sec/batch\n", "Epoch: 20/20... Training Step: 9088... Training loss: 0.8837... 0.1171 sec/batch\n", "Epoch: 20/20... Training Step: 9089... Training loss: 1.0338... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 9090... Training loss: 1.0483... 0.1209 sec/batch\n", "Epoch: 20/20... Training Step: 9091... Training loss: 0.9335... 0.1163 sec/batch\n", "Epoch: 20/20... Training Step: 9092... Training loss: 0.8116... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 9093... Training loss: 0.9218... 0.1195 sec/batch\n", "Epoch: 20/20... Training Step: 9094... Training loss: 1.1158... 0.1193 sec/batch\n", "Epoch: 20/20... Training Step: 9095... Training loss: 0.9219... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 9096... Training loss: 0.9410... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9097... Training loss: 0.7958... 0.1229 sec/batch\n", "Epoch: 20/20... Training Step: 9098... Training loss: 0.8869... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9099... Training loss: 0.9153... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 9100... Training loss: 1.0001... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9101... Training loss: 0.8537... 0.1187 sec/batch\n", "Epoch: 20/20... Training Step: 9102... Training loss: 0.9385... 0.1153 sec/batch\n", "Epoch: 20/20... Training Step: 9103... Training loss: 1.0786... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9104... Training loss: 0.9444... 0.1138 sec/batch\n", "Epoch: 20/20... Training Step: 9105... Training loss: 0.8390... 0.1134 sec/batch\n", "Epoch: 20/20... Training Step: 9106... Training loss: 1.0368... 0.1173 sec/batch\n", "Epoch: 20/20... Training Step: 9107... Training loss: 0.9639... 0.1185 sec/batch\n", "Epoch: 20/20... Training Step: 9108... Training loss: 0.9923... 0.1167 sec/batch\n", "Epoch: 20/20... Training Step: 9109... Training loss: 0.8351... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 9110... Training loss: 0.9332... 0.1130 sec/batch\n", "Epoch: 20/20... Training Step: 9111... Training loss: 1.0789... 0.1106 sec/batch\n", "Epoch: 20/20... Training Step: 9112... Training loss: 1.1003... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9113... Training loss: 0.8674... 0.1155 sec/batch\n", "Epoch: 20/20... Training Step: 9114... Training loss: 0.7972... 0.1174 sec/batch\n", "Epoch: 20/20... Training Step: 9115... Training loss: 0.9226... 0.1217 sec/batch\n", "Epoch: 20/20... Training Step: 9116... Training loss: 0.8909... 0.1274 sec/batch\n", "Epoch: 20/20... Training Step: 9117... Training loss: 0.9538... 0.1308 sec/batch\n", "Epoch: 20/20... Training Step: 9118... Training loss: 0.9089... 0.1216 sec/batch\n", "Epoch: 20/20... Training Step: 9119... Training loss: 0.8399... 0.1308 sec/batch\n", "Epoch: 20/20... Training Step: 9120... Training loss: 1.2607... 0.1263 sec/batch\n", "Epoch: 20/20... Training Step: 9121... Training loss: 0.8090... 0.1125 sec/batch\n", "Epoch: 20/20... Training Step: 9122... Training loss: 0.9580... 0.1166 sec/batch\n", "Epoch: 20/20... Training Step: 9123... Training loss: 0.8290... 0.1239 sec/batch\n", "Epoch: 20/20... Training Step: 9124... Training loss: 1.0782... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 9125... Training loss: 1.1134... 0.1203 sec/batch\n", "Epoch: 20/20... Training Step: 9126... Training loss: 0.9353... 0.1157 sec/batch\n", "Epoch: 20/20... Training Step: 9127... Training loss: 0.8706... 0.1212 sec/batch\n", "Epoch: 20/20... Training Step: 9128... Training loss: 0.7608... 0.1314 sec/batch\n", "Epoch: 20/20... Training Step: 9129... Training loss: 0.8094... 0.1494 sec/batch\n", "Epoch: 20/20... Training Step: 9130... Training loss: 0.8067... 0.1278 sec/batch\n", "Epoch: 20/20... Training Step: 9131... Training loss: 0.8531... 0.1254 sec/batch\n", "Epoch: 20/20... Training Step: 9132... Training loss: 0.8277... 0.1317 sec/batch\n", "Epoch: 20/20... Training Step: 9133... Training loss: 0.7916... 0.1203 sec/batch\n", "Epoch: 20/20... Training Step: 9134... Training loss: 0.8373... 0.1156 sec/batch\n", "Epoch: 20/20... Training Step: 9135... Training loss: 0.6891... 0.1166 sec/batch\n", "Epoch: 20/20... Training Step: 9136... Training loss: 0.7458... 0.1270 sec/batch\n", "Epoch: 20/20... Training Step: 9137... Training loss: 0.7324... 0.1249 sec/batch\n", "Epoch: 20/20... Training Step: 9138... Training loss: 1.0459... 0.1278 sec/batch\n", "Epoch: 20/20... Training Step: 9139... Training loss: 0.8258... 0.1252 sec/batch\n", "Epoch: 20/20... Training Step: 9140... Training loss: 0.8109... 0.1242 sec/batch\n", "Epoch: 20/20... Training Step: 9141... Training loss: 0.8508... 0.1227 sec/batch\n", "Epoch: 20/20... Training Step: 9142... Training loss: 0.8534... 0.1210 sec/batch\n", "Epoch: 20/20... Training Step: 9143... Training loss: 0.9070... 0.1242 sec/batch\n", "Epoch: 20/20... Training Step: 9144... Training loss: 0.8348... 0.1233 sec/batch\n", "Epoch: 20/20... Training Step: 9145... Training loss: 0.7611... 0.1239 sec/batch\n", "Epoch: 20/20... Training Step: 9146... Training loss: 0.8233... 0.1243 sec/batch\n", "Epoch: 20/20... Training Step: 9147... Training loss: 0.9967... 0.1277 sec/batch\n", "Epoch: 20/20... Training Step: 9148... Training loss: 1.0010... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 9149... Training loss: 0.8395... 0.1287 sec/batch\n", "Epoch: 20/20... Training Step: 9150... Training loss: 0.8898... 0.1257 sec/batch\n", "Epoch: 20/20... Training Step: 9151... Training loss: 0.8111... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 9152... Training loss: 0.8784... 0.1283 sec/batch\n", "Epoch: 20/20... Training Step: 9153... Training loss: 0.7651... 0.1228 sec/batch\n", "Epoch: 20/20... Training Step: 9154... Training loss: 0.7204... 0.1277 sec/batch\n", "Epoch: 20/20... Training Step: 9155... Training loss: 0.8882... 0.1260 sec/batch\n", "Epoch: 20/20... Training Step: 9156... Training loss: 0.7722... 0.1225 sec/batch\n", "Epoch: 20/20... Training Step: 9157... Training loss: 0.8136... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9158... Training loss: 0.9192... 0.1200 sec/batch\n", "Epoch: 20/20... Training Step: 9159... Training loss: 0.7656... 0.1284 sec/batch\n", "Epoch: 20/20... Training Step: 9160... Training loss: 0.8754... 0.1283 sec/batch\n", "Epoch: 20/20... Training Step: 9161... Training loss: 0.8530... 0.1264 sec/batch\n", "Epoch: 20/20... Training Step: 9162... Training loss: 0.8523... 0.1206 sec/batch\n", "Epoch: 20/20... Training Step: 9163... Training loss: 0.8422... 0.1254 sec/batch\n", "Epoch: 20/20... Training Step: 9164... Training loss: 0.9419... 0.1249 sec/batch\n", "Epoch: 20/20... Training Step: 9165... Training loss: 0.8304... 0.1235 sec/batch\n", "Epoch: 20/20... Training Step: 9166... Training loss: 0.9151... 0.1234 sec/batch\n", "Epoch: 20/20... Training Step: 9167... Training loss: 1.0094... 0.1217 sec/batch\n", "Epoch: 20/20... Training Step: 9168... Training loss: 0.9293... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 9169... Training loss: 0.8099... 0.1269 sec/batch\n", "Epoch: 20/20... Training Step: 9170... Training loss: 0.6800... 0.1221 sec/batch\n", "Epoch: 20/20... Training Step: 9171... Training loss: 0.9715... 0.1273 sec/batch\n", "Epoch: 20/20... Training Step: 9172... Training loss: 0.7667... 0.1244 sec/batch\n", "Epoch: 20/20... Training Step: 9173... Training loss: 0.7911... 0.1243 sec/batch\n", "Epoch: 20/20... Training Step: 9174... Training loss: 0.8325... 0.1271 sec/batch\n", "Epoch: 20/20... Training Step: 9175... Training loss: 0.9590... 0.1193 sec/batch\n", "Epoch: 20/20... Training Step: 9176... Training loss: 0.6770... 0.1139 sec/batch\n", "Epoch: 20/20... Training Step: 9177... Training loss: 1.0022... 0.1191 sec/batch\n", "Epoch: 20/20... Training Step: 9178... Training loss: 0.8403... 0.1164 sec/batch\n", "Epoch: 20/20... Training Step: 9179... Training loss: 0.8026... 0.1150 sec/batch\n", "Epoch: 20/20... Training Step: 9180... Training loss: 1.0784... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 9181... Training loss: 0.8979... 0.1183 sec/batch\n", "Epoch: 20/20... Training Step: 9182... Training loss: 0.8947... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9183... Training loss: 0.8670... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9184... Training loss: 0.9312... 0.1210 sec/batch\n", "Epoch: 20/20... Training Step: 9185... Training loss: 0.9310... 0.1216 sec/batch\n", "Epoch: 20/20... Training Step: 9186... Training loss: 0.8900... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 9187... Training loss: 0.8245... 0.1218 sec/batch\n", "Epoch: 20/20... Training Step: 9188... Training loss: 0.9285... 0.1167 sec/batch\n", "Epoch: 20/20... Training Step: 9189... Training loss: 0.8415... 0.1220 sec/batch\n", "Epoch: 20/20... Training Step: 9190... Training loss: 0.8847... 0.1155 sec/batch\n", "Epoch: 20/20... Training Step: 9191... Training loss: 0.8504... 0.1202 sec/batch\n", "Epoch: 20/20... Training Step: 9192... Training loss: 1.0037... 0.1150 sec/batch\n", "Epoch: 20/20... Training Step: 9193... Training loss: 0.8610... 0.1163 sec/batch\n", "Epoch: 20/20... Training Step: 9194... Training loss: 0.8698... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9195... Training loss: 0.9475... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9196... Training loss: 0.9406... 0.1183 sec/batch\n", "Epoch: 20/20... Training Step: 9197... Training loss: 0.8920... 0.1215 sec/batch\n", "Epoch: 20/20... Training Step: 9198... Training loss: 0.9076... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9199... Training loss: 0.8521... 0.1166 sec/batch\n", "Epoch: 20/20... Training Step: 9200... Training loss: 0.8944... 0.1149 sec/batch\n", "Epoch: 20/20... Training Step: 9201... Training loss: 0.6967... 0.1137 sec/batch\n", "Epoch: 20/20... Training Step: 9202... Training loss: 0.8023... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 9203... Training loss: 0.8994... 0.1186 sec/batch\n", "Epoch: 20/20... Training Step: 9204... Training loss: 0.7692... 0.1233 sec/batch\n", "Epoch: 20/20... Training Step: 9205... Training loss: 0.6939... 0.1220 sec/batch\n", "Epoch: 20/20... Training Step: 9206... Training loss: 0.8485... 0.1142 sec/batch\n", "Epoch: 20/20... Training Step: 9207... Training loss: 0.7875... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9208... Training loss: 0.9031... 0.1158 sec/batch\n", "Epoch: 20/20... Training Step: 9209... Training loss: 0.9788... 0.1142 sec/batch\n", "Epoch: 20/20... Training Step: 9210... Training loss: 1.0018... 0.1176 sec/batch\n", "Epoch: 20/20... Training Step: 9211... Training loss: 0.8500... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 9212... Training loss: 1.0816... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 9213... Training loss: 0.8450... 0.1159 sec/batch\n", "Epoch: 20/20... Training Step: 9214... Training loss: 0.9057... 0.1123 sec/batch\n", "Epoch: 20/20... Training Step: 9215... Training loss: 0.9670... 0.1196 sec/batch\n", "Epoch: 20/20... Training Step: 9216... Training loss: 0.9195... 0.1144 sec/batch\n", "Epoch: 20/20... Training Step: 9217... Training loss: 0.7314... 0.1240 sec/batch\n", "Epoch: 20/20... Training Step: 9218... Training loss: 0.8477... 0.1133 sec/batch\n", "Epoch: 20/20... Training Step: 9219... Training loss: 0.7342... 0.1154 sec/batch\n", "Epoch: 20/20... Training Step: 9220... Training loss: 0.9355... 0.1166 sec/batch\n", "Epoch: 20/20... Training Step: 9221... Training loss: 0.9337... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 9222... Training loss: 1.0801... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 9223... Training loss: 0.9220... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9224... Training loss: 1.1191... 0.1160 sec/batch\n", "Epoch: 20/20... Training Step: 9225... Training loss: 0.8040... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 9226... Training loss: 0.9374... 0.1133 sec/batch\n", "Epoch: 20/20... Training Step: 9227... Training loss: 0.7240... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 9228... Training loss: 0.8963... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9229... Training loss: 0.9193... 0.1185 sec/batch\n", "Epoch: 20/20... Training Step: 9230... Training loss: 0.9049... 0.1157 sec/batch\n", "Epoch: 20/20... Training Step: 9231... Training loss: 0.9062... 0.1194 sec/batch\n", "Epoch: 20/20... Training Step: 9232... Training loss: 1.0257... 0.1199 sec/batch\n", "Epoch: 20/20... Training Step: 9233... Training loss: 0.8126... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9234... Training loss: 0.7696... 0.1148 sec/batch\n", "Epoch: 20/20... Training Step: 9235... Training loss: 0.8442... 0.1285 sec/batch\n", "Epoch: 20/20... Training Step: 9236... Training loss: 0.7972... 0.1297 sec/batch\n", "Epoch: 20/20... Training Step: 9237... Training loss: 0.9349... 0.1242 sec/batch\n", "Epoch: 20/20... Training Step: 9238... Training loss: 0.8360... 0.1222 sec/batch\n", "Epoch: 20/20... Training Step: 9239... Training loss: 0.8206... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 9240... Training loss: 0.9688... 0.1175 sec/batch\n", "Epoch: 20/20... Training Step: 9241... Training loss: 0.8671... 0.1229 sec/batch\n", "Epoch: 20/20... Training Step: 9242... Training loss: 0.8892... 0.1220 sec/batch\n", "Epoch: 20/20... Training Step: 9243... Training loss: 0.8687... 0.1170 sec/batch\n", "Epoch: 20/20... Training Step: 9244... Training loss: 0.9668... 0.1178 sec/batch\n", "Epoch: 20/20... Training Step: 9245... Training loss: 0.7701... 0.1207 sec/batch\n", "Epoch: 20/20... Training Step: 9246... Training loss: 0.8911... 0.1278 sec/batch\n", "Epoch: 20/20... Training Step: 9247... Training loss: 0.9587... 0.1214 sec/batch\n", "Epoch: 20/20... Training Step: 9248... Training loss: 0.8099... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 9249... Training loss: 1.0678... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9250... Training loss: 0.8974... 0.1172 sec/batch\n", "Epoch: 20/20... Training Step: 9251... Training loss: 0.9147... 0.1152 sec/batch\n", "Epoch: 20/20... Training Step: 9252... Training loss: 1.0028... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 9253... Training loss: 0.8413... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 9254... Training loss: 0.8912... 0.1136 sec/batch\n", "Epoch: 20/20... Training Step: 9255... Training loss: 1.0755... 0.1168 sec/batch\n", "Epoch: 20/20... Training Step: 9256... Training loss: 1.0946... 0.1162 sec/batch\n", "Epoch: 20/20... Training Step: 9257... Training loss: 0.7189... 0.1166 sec/batch\n", "Epoch: 20/20... Training Step: 9258... Training loss: 0.8001... 0.1200 sec/batch\n", "Epoch: 20/20... Training Step: 9259... Training loss: 0.9301... 0.1205 sec/batch\n", "Epoch: 20/20... Training Step: 9260... Training loss: 0.9401... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 9261... Training loss: 0.8072... 0.1201 sec/batch\n", "Epoch: 20/20... Training Step: 9262... Training loss: 0.8136... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9263... Training loss: 0.8872... 0.1165 sec/batch\n", "Epoch: 20/20... Training Step: 9264... Training loss: 0.8398... 0.1177 sec/batch\n", "Epoch: 20/20... Training Step: 9265... Training loss: 0.8970... 0.1162 sec/batch\n", "Epoch: 20/20... Training Step: 9266... Training loss: 0.8698... 0.1164 sec/batch\n", "Epoch: 20/20... Training Step: 9267... Training loss: 0.8412... 0.1169 sec/batch\n", "Epoch: 20/20... Training Step: 9268... Training loss: 0.8722... 0.1204 sec/batch\n", "Epoch: 20/20... Training Step: 9269... Training loss: 0.8469... 0.1127 sec/batch\n", "Epoch: 20/20... Training Step: 9270... Training loss: 0.9399... 0.1190 sec/batch\n", "Epoch: 20/20... Training Step: 9271... Training loss: 0.7496... 0.1161 sec/batch\n", "Epoch: 20/20... Training Step: 9272... Training loss: 0.7365... 0.1182 sec/batch\n", "Epoch: 20/20... Training Step: 9273... Training loss: 0.8525... 0.1149 sec/batch\n", "Epoch: 20/20... Training Step: 9274... Training loss: 0.7399... 0.1126 sec/batch\n", "Epoch: 20/20... Training Step: 9275... Training loss: 0.9084... 0.1162 sec/batch\n", "Epoch: 20/20... Training Step: 9276... Training loss: 0.8220... 0.1133 sec/batch\n", "Epoch: 20/20... Training Step: 9277... Training loss: 0.9242... 0.1167 sec/batch\n", "Epoch: 20/20... Training Step: 9278... Training loss: 0.7732... 0.1181 sec/batch\n", "Epoch: 20/20... Training Step: 9279... Training loss: 0.9238... 0.1149 sec/batch\n", "Epoch: 20/20... Training Step: 9280... Training loss: 0.8466... 0.1156 sec/batch\n" ] } ], "source": [ "epochs = 20\n", "# Save every N iterations\n", "save_every_n = 200\n", "\n", "model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,\n", " lstm_size=lstm_size, num_layers=num_layers, \n", " learning_rate=learning_rate)\n", "\n", "saver = tf.train.Saver(max_to_keep=100)\n", "with tf.Session() as sess:\n", " sess.run(tf.global_variables_initializer())\n", " \n", " # Use the line below to load a checkpoint and resume training\n", " #saver.restore(sess, 'checkpoints/______.ckpt')\n", " counter = 0\n", " for e in range(epochs):\n", " # Train network\n", " new_state = sess.run(model.initial_state)\n", " loss = 0\n", " for x, y in get_batches(encoded, batch_size, num_steps):\n", " counter += 1\n", " start = time.time()\n", " feed = {model.inputs: x,\n", " model.targets: y,\n", " model.keep_prob: keep_prob,\n", " model.initial_state: new_state}\n", " batch_loss, new_state, _ = sess.run([model.loss, \n", " model.final_state, \n", " model.optimizer], \n", " feed_dict=feed)\n", " \n", " end = time.time()\n", " print('Epoch: {}/{}... '.format(e+1, epochs),\n", " 'Training Step: {}... '.format(counter),\n", " 'Training loss: {:.4f}... '.format(batch_loss),\n", " '{:.4f} sec/batch'.format((end-start)))\n", " \n", " if (counter % save_every_n == 0):\n", " saver.save(sess, \"checkpoints/i{}_l{}.ckpt\".format(counter, lstm_size))\n", " \n", " saver.save(sess, \"checkpoints/i{}_l{}.ckpt\".format(counter, lstm_size))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "model_checkpoint_path: \"checkpoints/i9280_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i1000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i1200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i1400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i1600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i1800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i2000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i2200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i2400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i2600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i2800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i3000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i3200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i3400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i3600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i3800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i4000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i4200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i4400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i4600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i4800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i5000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i5200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i5400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i5600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i5800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i6000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i6200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i6400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i6600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i6800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i7000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i7200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i7400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i7600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i7800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i8000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i8200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i8400_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i8600_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i8800_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i9000_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i9200_l512.ckpt\"\n", "all_model_checkpoint_paths: \"checkpoints/i9280_l512.ckpt\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.train.get_checkpoint_state('checkpoints')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Sampling" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def pick_top_n(preds, vocab_size, top_n=5):\n", " p = np.squeeze(preds)\n", " p[np.argsort(p)[:-top_n]] = 0\n", " p = p / np.sum(p)\n", " c = np.random.choice(vocab_size, 1, p=p)[0]\n", " return c" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def sample(checkpoint, n_samples, lstm_size, vocab_size, prime=\"@\\n\"):\n", " samples = [c for c in prime]\n", " model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)\n", " saver = tf.train.Saver()\n", " with tf.Session() as sess:\n", " saver.restore(sess, checkpoint)\n", " new_state = sess.run(model.initial_state)\n", " for c in prime:\n", " x = np.zeros((1, 1))\n", " x[0,0] = vocab_to_int[c]\n", " feed = {model.inputs: x,\n", " model.keep_prob: 1.,\n", " model.initial_state: new_state}\n", " preds, new_state = sess.run([model.prediction, model.final_state], \n", " feed_dict=feed)\n", "\n", " c = pick_top_n(preds, len(vocab))\n", " samples.append(int_to_vocab[c])\n", "\n", " for i in range(n_samples):\n", " x[0,0] = c\n", " feed = {model.inputs: x,\n", " model.keep_prob: 1.,\n", " model.initial_state: new_state}\n", " preds, new_state = sess.run([model.prediction, model.final_state], \n", " feed_dict=feed)\n", "\n", " c = pick_top_n(preds, len(vocab))\n", " samples.append(int_to_vocab[c])\n", " \n", " return ''.join(samples)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "data": { "text/plain": [ "'checkpoints/i9280_l512.ckpt'" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.train.latest_checkpoint('checkpoints')" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "checkpoint = tf.train.latest_checkpoint('checkpoints')\n", "samp = sample(checkpoint, 5000, lstm_size, len(vocab), prime=\"@\\n\")" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "r = []\n", "\n", "r.append(\"**kern\\t**kern\\n\")\n", "r.append(\"*staff2\\t*staff1\\n\")\n", "r.append(\"*clefF4\\t*clefG2\\n\")\n", "r.append(\"*k[]\\t*k[]\\n\")\n", "r.append(\"*C:\\t*C:\\n\")\n", "r.append(\"*M4/4\\t*M4/4\\n\")\n", "r.append(\"*MM80\\t*MM80\\n\")\n", "\n", "bar = 1\n", "for line in samp.splitlines():\n", " sp = line.split('\\t')\n", " if sp[0] == '@':\n", " r.append(\"={bar}\\t={bar}\\n\".format(bar=bar))\n", " bar += 1\n", " else:\n", " ln = len(sp)\n", " if ln == 1 and sp[0] != \"\":\n", " r.append(sp[0])\n", " r.append('\\t')\n", " r.append('.')\n", " r.append('\\n')\n", " elif ln == 1 and sp[0] == \"\":\n", " r.append(\".\")\n", " r.append('\\t')\n", " r.append('.')\n", " r.append('\\n')\n", " elif sp[0] == \"*-\" or sp[1] == \"*-\":\n", " continue\n", " else:\n", " r.append(sp[0])\n", " r.append('\\t')\n", " r.append(sp[1])\n", " r.append('\\n')\n", "\n", "r.append(\"==|!\\t==|!\\n\")\n", "r.append(\"*-\\t*-\\n\")\n", "\n", "open(\"results/bach2ai.krn\",\"w\").writelines(r)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "humdrum.spineParser: WARNING: Error in parsing event ('#') at position 33 for spine None: Could not parse # for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('32') at position 75 for spine None: Could not parse 32 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('32') at position 79 for spine None: Could not parse 32 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 86 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 120 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('116#') at position 141 for spine None: Could not parse 116# for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 152 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 155 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 184 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 217 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 231 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 239 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8') at position 298 for spine None: Could not parse 8 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 336 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8.') at position 374 for spine None: Could not parse 8. for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16.') at position 389 for spine None: Could not parse 16. for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 407 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 430 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('#') at position 448 for spine None: Could not parse # for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1616.') at position 456 for spine None: Could not parse 1616. for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('.##') at position 461 for spine None: Could not parse .## for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('116#') at position 477 for spine None: Could not parse 116# for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 480 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 498 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 500 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 511 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 525 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('W') at position 527 for spine None: Could not parse W for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 537 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('#') at position 538 for spine None: Could not parse # for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 562 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('.#') at position 614 for spine None: Could not parse .# for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 649 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('{166') at position 28 for spine None: Could not parse {166 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 43 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('#J#') at position 54 for spine None: Could not parse #J# for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('#') at position 67 for spine None: Could not parse # for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 95 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 126 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('116') at position 131 for spine None: Could not parse 116 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 145 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('161') at position 162 for spine None: Could not parse 161 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 209 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 211 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8') at position 238 for spine None: Could not parse 8 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('{') at position 278 for spine None: Could not parse { for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16.#}') at position 281 for spine None: Could not parse 16.#} for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 328 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 344 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8') at position 357 for spine None: Could not parse 8 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 377 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('.6@') at position 389 for spine None: Could not parse .6@ for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 391 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 394 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 400 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 416 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8.') at position 417 for spine None: Could not parse 8. for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('{') at position 447 for spine None: Could not parse { for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('{1') at position 462 for spine None: Could not parse {1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 468 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 490 for spine None: Could not parse 16 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('8.M') at position 526 for spine None: Could not parse 8.M for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1616') at position 530 for spine None: Could not parse 1616 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 532 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1616#') at position 601 for spine None: Could not parse 1616# for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1616') at position 604 for spine None: Could not parse 1616 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('1') at position 630 for spine None: Could not parse 1 for note information\n", "humdrum.spineParser: WARNING: Error in parsing event ('16') at position 648 for spine None: Could not parse 16 for note information\n" ] }, { "data": { "text/plain": [ "'midi/bach2ai.mid'" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from music21 import *\n", "m1 = converter.parse(\"results/bach2ai.krn\")\n", "m1.write('midi', fp='midi/bach2ai.mid')" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ], "text/plain": [ "