INFO:gluonnlp:19:42:58 Namespace(accumulate=None, batch_size=12, bert_dataset='book_corpus_wiki_en_uncased', bert_model='bert_12_768_12', doc_stride=128, epochs=2, export=True, gpu='0', input_size=768, log_interval=50, lr=3e-05, max_answer_length=30, max_query_length=64, max_seq_length=384, model_parameters=None, n_best_size=20, null_score_diff_threshold=0.0, only_predict=False, optimizer='adam', output_dir='output_dir0', pretrained_bert_parameters=None, seq_length=384, test_batch_size=24, uncased=True, version_2=False, warmup_ratio=0.1) INFO:gluonnlp:19:43:02 Loader Train data... INFO:gluonnlp:19:43:03 Number of records in Train data:87599 INFO:gluonnlp:19:44:03 The number of examples after preprocessing:88641 INFO:gluonnlp:19:44:03 Start Training INFO:gluonnlp:19:44:22 Epoch: 0, Batch: 49/7387, Loss=5.9289, lr=0.0000010 Time cost=18.4 Thoughput=32.67 samples/s INFO:gluonnlp:19:44:40 Epoch: 0, Batch: 99/7387, Loss=5.7155, lr=0.0000020 Time cost=18.0 Thoughput=33.39 samples/s INFO:gluonnlp:19:44:58 Epoch: 0, Batch: 149/7387, Loss=5.3770, lr=0.0000030 Time cost=18.0 Thoughput=33.33 samples/s INFO:gluonnlp:19:45:16 Epoch: 0, Batch: 199/7387, Loss=4.9678, lr=0.0000041 Time cost=18.0 Thoughput=33.30 samples/s INFO:gluonnlp:19:45:34 Epoch: 0, Batch: 249/7387, Loss=4.5589, lr=0.0000051 Time cost=18.0 Thoughput=33.30 samples/s INFO:gluonnlp:19:45:52 Epoch: 0, Batch: 299/7387, Loss=4.2786, lr=0.0000061 Time cost=18.1 Thoughput=33.24 samples/s INFO:gluonnlp:19:46:10 Epoch: 0, Batch: 349/7387, Loss=3.9108, lr=0.0000071 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:19:46:28 Epoch: 0, Batch: 399/7387, Loss=3.4292, lr=0.0000081 Time cost=18.1 Thoughput=33.24 samples/s INFO:gluonnlp:19:46:46 Epoch: 0, Batch: 449/7387, Loss=3.2122, lr=0.0000091 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:19:47:04 Epoch: 0, Batch: 499/7387, Loss=2.8913, lr=0.0000102 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:19:47:22 Epoch: 0, Batch: 549/7387, Loss=2.8119, lr=0.0000112 Time cost=18.0 Thoughput=33.27 samples/s INFO:gluonnlp:19:47:40 Epoch: 0, Batch: 599/7387, Loss=2.6980, lr=0.0000122 Time cost=18.0 Thoughput=33.26 samples/s INFO:gluonnlp:19:47:58 Epoch: 0, Batch: 649/7387, Loss=2.4626, lr=0.0000132 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:19:48:16 Epoch: 0, Batch: 699/7387, Loss=2.2634, lr=0.0000142 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:19:48:34 Epoch: 0, Batch: 749/7387, Loss=2.1627, lr=0.0000152 Time cost=18.0 Thoughput=33.25 samples/s INFO:gluonnlp:19:48:52 Epoch: 0, Batch: 799/7387, Loss=2.1155, lr=0.0000162 Time cost=18.0 Thoughput=33.26 samples/s INFO:gluonnlp:19:49:10 Epoch: 0, Batch: 849/7387, Loss=2.1415, lr=0.0000173 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:19:49:28 Epoch: 0, Batch: 899/7387, Loss=2.0238, lr=0.0000183 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:19:49:46 Epoch: 0, Batch: 949/7387, Loss=2.0373, lr=0.0000193 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:19:50:05 Epoch: 0, Batch: 999/7387, Loss=1.7648, lr=0.0000203 Time cost=18.2 Thoughput=32.99 samples/s INFO:gluonnlp:19:50:23 Epoch: 0, Batch: 1049/7387, Loss=1.8969, lr=0.0000213 Time cost=18.4 Thoughput=32.53 samples/s INFO:gluonnlp:19:50:42 Epoch: 0, Batch: 1099/7387, Loss=1.7603, lr=0.0000223 Time cost=18.8 Thoughput=31.90 samples/s INFO:gluonnlp:19:51:01 Epoch: 0, Batch: 1149/7387, Loss=1.7044, lr=0.0000234 Time cost=19.0 Thoughput=31.57 samples/s INFO:gluonnlp:19:51:19 Epoch: 0, Batch: 1199/7387, Loss=1.7601, lr=0.0000244 Time cost=18.5 Thoughput=32.45 samples/s INFO:gluonnlp:19:51:38 Epoch: 0, Batch: 1249/7387, Loss=1.7663, lr=0.0000254 Time cost=18.4 Thoughput=32.55 samples/s INFO:gluonnlp:19:51:56 Epoch: 0, Batch: 1299/7387, Loss=1.6277, lr=0.0000264 Time cost=18.4 Thoughput=32.53 samples/s INFO:gluonnlp:19:52:15 Epoch: 0, Batch: 1349/7387, Loss=1.6895, lr=0.0000274 Time cost=18.3 Thoughput=32.73 samples/s INFO:gluonnlp:19:52:33 Epoch: 0, Batch: 1399/7387, Loss=1.6101, lr=0.0000284 Time cost=18.4 Thoughput=32.56 samples/s INFO:gluonnlp:19:52:51 Epoch: 0, Batch: 1449/7387, Loss=1.6225, lr=0.0000295 Time cost=18.2 Thoughput=32.90 samples/s INFO:gluonnlp:19:53:10 Epoch: 0, Batch: 1499/7387, Loss=1.5752, lr=0.0000299 Time cost=18.3 Thoughput=32.78 samples/s INFO:gluonnlp:19:53:28 Epoch: 0, Batch: 1549/7387, Loss=1.6401, lr=0.0000298 Time cost=18.1 Thoughput=33.10 samples/s INFO:gluonnlp:19:53:46 Epoch: 0, Batch: 1599/7387, Loss=1.4705, lr=0.0000297 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:19:54:04 Epoch: 0, Batch: 1649/7387, Loss=1.5361, lr=0.0000296 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:19:54:22 Epoch: 0, Batch: 1699/7387, Loss=1.4797, lr=0.0000295 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:19:54:40 Epoch: 0, Batch: 1749/7387, Loss=1.3974, lr=0.0000294 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:19:54:58 Epoch: 0, Batch: 1799/7387, Loss=1.3992, lr=0.0000293 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:19:55:16 Epoch: 0, Batch: 1849/7387, Loss=1.4179, lr=0.0000292 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:19:55:34 Epoch: 0, Batch: 1899/7387, Loss=1.4514, lr=0.0000290 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:19:55:53 Epoch: 0, Batch: 1949/7387, Loss=1.4820, lr=0.0000289 Time cost=18.1 Thoughput=33.06 samples/s INFO:gluonnlp:19:56:11 Epoch: 0, Batch: 1999/7387, Loss=1.3428, lr=0.0000288 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:19:56:29 Epoch: 0, Batch: 2049/7387, Loss=1.4235, lr=0.0000287 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:19:56:47 Epoch: 0, Batch: 2099/7387, Loss=1.5044, lr=0.0000286 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:19:57:05 Epoch: 0, Batch: 2149/7387, Loss=1.5193, lr=0.0000285 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:19:57:23 Epoch: 0, Batch: 2199/7387, Loss=1.3787, lr=0.0000284 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:19:57:42 Epoch: 0, Batch: 2249/7387, Loss=1.2426, lr=0.0000283 Time cost=18.6 Thoughput=32.29 samples/s INFO:gluonnlp:19:58:00 Epoch: 0, Batch: 2299/7387, Loss=1.4248, lr=0.0000281 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:19:58:18 Epoch: 0, Batch: 2349/7387, Loss=1.3720, lr=0.0000280 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:19:58:36 Epoch: 0, Batch: 2399/7387, Loss=1.4423, lr=0.0000279 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:19:58:54 Epoch: 0, Batch: 2449/7387, Loss=1.3356, lr=0.0000278 Time cost=18.1 Thoughput=33.09 samples/s INFO:gluonnlp:19:59:12 Epoch: 0, Batch: 2499/7387, Loss=1.2451, lr=0.0000277 Time cost=18.1 Thoughput=33.11 samples/s INFO:gluonnlp:19:59:30 Epoch: 0, Batch: 2549/7387, Loss=1.4012, lr=0.0000276 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:19:59:48 Epoch: 0, Batch: 2599/7387, Loss=1.2729, lr=0.0000275 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:00:06 Epoch: 0, Batch: 2649/7387, Loss=1.2177, lr=0.0000274 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:00:25 Epoch: 0, Batch: 2699/7387, Loss=1.3891, lr=0.0000272 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:00:43 Epoch: 0, Batch: 2749/7387, Loss=1.2860, lr=0.0000271 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:01:01 Epoch: 0, Batch: 2799/7387, Loss=1.2637, lr=0.0000270 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:01:19 Epoch: 0, Batch: 2849/7387, Loss=1.2540, lr=0.0000269 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:01:37 Epoch: 0, Batch: 2899/7387, Loss=1.3573, lr=0.0000268 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:01:55 Epoch: 0, Batch: 2949/7387, Loss=1.2420, lr=0.0000267 Time cost=18.2 Thoughput=32.98 samples/s INFO:gluonnlp:20:02:13 Epoch: 0, Batch: 2999/7387, Loss=1.3263, lr=0.0000266 Time cost=18.3 Thoughput=32.77 samples/s INFO:gluonnlp:20:02:32 Epoch: 0, Batch: 3049/7387, Loss=1.2306, lr=0.0000265 Time cost=18.2 Thoughput=32.92 samples/s INFO:gluonnlp:20:02:50 Epoch: 0, Batch: 3099/7387, Loss=1.2392, lr=0.0000263 Time cost=18.2 Thoughput=32.92 samples/s INFO:gluonnlp:20:03:08 Epoch: 0, Batch: 3149/7387, Loss=1.2888, lr=0.0000262 Time cost=18.2 Thoughput=33.00 samples/s INFO:gluonnlp:20:03:27 Epoch: 0, Batch: 3199/7387, Loss=1.2213, lr=0.0000261 Time cost=18.4 Thoughput=32.53 samples/s INFO:gluonnlp:20:03:45 Epoch: 0, Batch: 3249/7387, Loss=1.2344, lr=0.0000260 Time cost=18.3 Thoughput=32.72 samples/s INFO:gluonnlp:20:04:03 Epoch: 0, Batch: 3299/7387, Loss=1.2456, lr=0.0000259 Time cost=18.5 Thoughput=32.46 samples/s INFO:gluonnlp:20:04:21 Epoch: 0, Batch: 3349/7387, Loss=1.1411, lr=0.0000258 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:04:40 Epoch: 0, Batch: 3399/7387, Loss=1.1903, lr=0.0000257 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:04:58 Epoch: 0, Batch: 3449/7387, Loss=1.2335, lr=0.0000255 Time cost=18.3 Thoughput=32.70 samples/s INFO:gluonnlp:20:05:16 Epoch: 0, Batch: 3499/7387, Loss=1.2344, lr=0.0000254 Time cost=18.3 Thoughput=32.78 samples/s INFO:gluonnlp:20:05:34 Epoch: 0, Batch: 3549/7387, Loss=1.1899, lr=0.0000253 Time cost=18.2 Thoughput=32.96 samples/s INFO:gluonnlp:20:05:53 Epoch: 0, Batch: 3599/7387, Loss=1.2425, lr=0.0000252 Time cost=18.3 Thoughput=32.81 samples/s INFO:gluonnlp:20:06:11 Epoch: 0, Batch: 3649/7387, Loss=1.3050, lr=0.0000251 Time cost=18.3 Thoughput=32.72 samples/s INFO:gluonnlp:20:06:29 Epoch: 0, Batch: 3699/7387, Loss=1.2566, lr=0.0000250 Time cost=18.3 Thoughput=32.77 samples/s INFO:gluonnlp:20:06:48 Epoch: 0, Batch: 3749/7387, Loss=1.2512, lr=0.0000249 Time cost=18.2 Thoughput=32.96 samples/s INFO:gluonnlp:20:07:06 Epoch: 0, Batch: 3799/7387, Loss=1.1578, lr=0.0000248 Time cost=18.2 Thoughput=32.96 samples/s INFO:gluonnlp:20:07:24 Epoch: 0, Batch: 3849/7387, Loss=1.2755, lr=0.0000246 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:07:42 Epoch: 0, Batch: 3899/7387, Loss=1.2368, lr=0.0000245 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:08:00 Epoch: 0, Batch: 3949/7387, Loss=1.3244, lr=0.0000244 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:08:18 Epoch: 0, Batch: 3999/7387, Loss=1.1825, lr=0.0000243 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:08:36 Epoch: 0, Batch: 4049/7387, Loss=1.1908, lr=0.0000242 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:08:54 Epoch: 0, Batch: 4099/7387, Loss=1.1894, lr=0.0000241 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:09:12 Epoch: 0, Batch: 4149/7387, Loss=1.1597, lr=0.0000240 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:09:31 Epoch: 0, Batch: 4199/7387, Loss=1.1972, lr=0.0000239 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:09:49 Epoch: 0, Batch: 4249/7387, Loss=1.2807, lr=0.0000237 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:10:07 Epoch: 0, Batch: 4299/7387, Loss=1.2483, lr=0.0000236 Time cost=18.1 Thoughput=33.09 samples/s INFO:gluonnlp:20:10:25 Epoch: 0, Batch: 4349/7387, Loss=1.2773, lr=0.0000235 Time cost=18.1 Thoughput=33.07 samples/s INFO:gluonnlp:20:10:43 Epoch: 0, Batch: 4399/7387, Loss=1.1814, lr=0.0000234 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:11:01 Epoch: 0, Batch: 4449/7387, Loss=1.1967, lr=0.0000233 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:11:19 Epoch: 0, Batch: 4499/7387, Loss=1.0206, lr=0.0000232 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:11:37 Epoch: 0, Batch: 4549/7387, Loss=1.1321, lr=0.0000231 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:11:55 Epoch: 0, Batch: 4599/7387, Loss=1.1645, lr=0.0000230 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:12:13 Epoch: 0, Batch: 4649/7387, Loss=1.2104, lr=0.0000228 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:12:32 Epoch: 0, Batch: 4699/7387, Loss=1.0559, lr=0.0000227 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:12:50 Epoch: 0, Batch: 4749/7387, Loss=1.2201, lr=0.0000226 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:13:08 Epoch: 0, Batch: 4799/7387, Loss=1.1218, lr=0.0000225 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:13:26 Epoch: 0, Batch: 4849/7387, Loss=1.1793, lr=0.0000224 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:13:44 Epoch: 0, Batch: 4899/7387, Loss=1.1502, lr=0.0000223 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:14:02 Epoch: 0, Batch: 4949/7387, Loss=1.1864, lr=0.0000222 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:14:20 Epoch: 0, Batch: 4999/7387, Loss=1.2069, lr=0.0000221 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:14:38 Epoch: 0, Batch: 5049/7387, Loss=1.1521, lr=0.0000219 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:14:56 Epoch: 0, Batch: 5099/7387, Loss=1.1584, lr=0.0000218 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:15:15 Epoch: 0, Batch: 5149/7387, Loss=1.1643, lr=0.0000217 Time cost=18.3 Thoughput=32.83 samples/s INFO:gluonnlp:20:15:33 Epoch: 0, Batch: 5199/7387, Loss=1.1291, lr=0.0000216 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:15:51 Epoch: 0, Batch: 5249/7387, Loss=1.1331, lr=0.0000215 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:16:09 Epoch: 0, Batch: 5299/7387, Loss=1.1299, lr=0.0000214 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:16:27 Epoch: 0, Batch: 5349/7387, Loss=1.1668, lr=0.0000213 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:16:45 Epoch: 0, Batch: 5399/7387, Loss=1.0694, lr=0.0000211 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:20:17:03 Epoch: 0, Batch: 5449/7387, Loss=1.1167, lr=0.0000210 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:17:21 Epoch: 0, Batch: 5499/7387, Loss=1.1501, lr=0.0000209 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:17:39 Epoch: 0, Batch: 5549/7387, Loss=1.1478, lr=0.0000208 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:17:57 Epoch: 0, Batch: 5599/7387, Loss=1.2027, lr=0.0000207 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:18:15 Epoch: 0, Batch: 5649/7387, Loss=1.0468, lr=0.0000206 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:18:33 Epoch: 0, Batch: 5699/7387, Loss=1.1525, lr=0.0000205 Time cost=18.1 Thoughput=33.11 samples/s INFO:gluonnlp:20:18:52 Epoch: 0, Batch: 5749/7387, Loss=1.0205, lr=0.0000204 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:19:10 Epoch: 0, Batch: 5799/7387, Loss=1.0077, lr=0.0000202 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:19:28 Epoch: 0, Batch: 5849/7387, Loss=1.1348, lr=0.0000201 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:19:46 Epoch: 0, Batch: 5899/7387, Loss=1.2047, lr=0.0000200 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:20:04 Epoch: 0, Batch: 5949/7387, Loss=1.0690, lr=0.0000199 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:20:22 Epoch: 0, Batch: 5999/7387, Loss=1.0342, lr=0.0000198 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:20:40 Epoch: 0, Batch: 6049/7387, Loss=1.0393, lr=0.0000197 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:20:58 Epoch: 0, Batch: 6099/7387, Loss=1.0300, lr=0.0000196 Time cost=18.1 Thoughput=33.24 samples/s INFO:gluonnlp:20:21:16 Epoch: 0, Batch: 6149/7387, Loss=1.0560, lr=0.0000195 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:21:34 Epoch: 0, Batch: 6199/7387, Loss=1.1179, lr=0.0000193 Time cost=18.1 Thoughput=33.24 samples/s INFO:gluonnlp:20:21:52 Epoch: 0, Batch: 6249/7387, Loss=1.0393, lr=0.0000192 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:22:10 Epoch: 0, Batch: 6299/7387, Loss=1.0732, lr=0.0000191 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:22:28 Epoch: 0, Batch: 6349/7387, Loss=1.0800, lr=0.0000190 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:22:47 Epoch: 0, Batch: 6399/7387, Loss=1.1053, lr=0.0000189 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:23:05 Epoch: 0, Batch: 6449/7387, Loss=1.1098, lr=0.0000188 Time cost=18.1 Thoughput=33.24 samples/s INFO:gluonnlp:20:23:23 Epoch: 0, Batch: 6499/7387, Loss=1.0512, lr=0.0000187 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:23:41 Epoch: 0, Batch: 6549/7387, Loss=1.0820, lr=0.0000186 Time cost=18.1 Thoughput=33.10 samples/s INFO:gluonnlp:20:23:59 Epoch: 0, Batch: 6599/7387, Loss=1.1115, lr=0.0000184 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:24:17 Epoch: 0, Batch: 6649/7387, Loss=1.1491, lr=0.0000183 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:24:35 Epoch: 0, Batch: 6699/7387, Loss=1.1318, lr=0.0000182 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:24:53 Epoch: 0, Batch: 6749/7387, Loss=1.0404, lr=0.0000181 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:25:11 Epoch: 0, Batch: 6799/7387, Loss=1.2041, lr=0.0000180 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:25:29 Epoch: 0, Batch: 6849/7387, Loss=1.1195, lr=0.0000179 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:25:47 Epoch: 0, Batch: 6899/7387, Loss=1.0404, lr=0.0000178 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:20:26:06 Epoch: 0, Batch: 6949/7387, Loss=1.0613, lr=0.0000177 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:26:24 Epoch: 0, Batch: 6999/7387, Loss=1.0826, lr=0.0000175 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:26:42 Epoch: 0, Batch: 7049/7387, Loss=1.1170, lr=0.0000174 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:27:00 Epoch: 0, Batch: 7099/7387, Loss=1.0132, lr=0.0000173 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:27:18 Epoch: 0, Batch: 7149/7387, Loss=0.9205, lr=0.0000172 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:27:36 Epoch: 0, Batch: 7199/7387, Loss=0.9777, lr=0.0000171 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:27:54 Epoch: 0, Batch: 7249/7387, Loss=1.0192, lr=0.0000170 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:28:12 Epoch: 0, Batch: 7299/7387, Loss=1.0828, lr=0.0000169 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:28:30 Epoch: 0, Batch: 7349/7387, Loss=1.0534, lr=0.0000167 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:28:44 Epoch: 0, Time cost=2680.29 s, Thoughput=2.76 samples/s INFO:gluonnlp:20:29:02 Epoch: 1, Batch: 49/7387, Loss=0.7845, lr=0.0000166 Time cost=18.1 Thoughput=57.48 samples/s INFO:gluonnlp:20:29:20 Epoch: 1, Batch: 99/7387, Loss=0.8763, lr=0.0000164 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:29:38 Epoch: 1, Batch: 149/7387, Loss=0.8303, lr=0.0000163 Time cost=18.1 Thoughput=33.11 samples/s INFO:gluonnlp:20:29:56 Epoch: 1, Batch: 199/7387, Loss=0.8161, lr=0.0000162 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:30:14 Epoch: 1, Batch: 249/7387, Loss=0.7513, lr=0.0000161 Time cost=18.2 Thoughput=32.97 samples/s INFO:gluonnlp:20:30:32 Epoch: 1, Batch: 299/7387, Loss=0.8101, lr=0.0000160 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:30:50 Epoch: 1, Batch: 349/7387, Loss=0.7909, lr=0.0000159 Time cost=18.1 Thoughput=33.10 samples/s INFO:gluonnlp:20:31:09 Epoch: 1, Batch: 399/7387, Loss=0.8726, lr=0.0000158 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:31:27 Epoch: 1, Batch: 449/7387, Loss=0.7982, lr=0.0000156 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:31:45 Epoch: 1, Batch: 499/7387, Loss=0.8036, lr=0.0000155 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:32:03 Epoch: 1, Batch: 549/7387, Loss=0.9046, lr=0.0000154 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:32:21 Epoch: 1, Batch: 599/7387, Loss=0.7883, lr=0.0000153 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:32:39 Epoch: 1, Batch: 649/7387, Loss=0.7876, lr=0.0000152 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:32:57 Epoch: 1, Batch: 699/7387, Loss=0.7765, lr=0.0000151 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:33:15 Epoch: 1, Batch: 749/7387, Loss=0.8087, lr=0.0000150 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:33:33 Epoch: 1, Batch: 799/7387, Loss=0.8592, lr=0.0000149 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:33:51 Epoch: 1, Batch: 849/7387, Loss=0.8149, lr=0.0000147 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:34:09 Epoch: 1, Batch: 899/7387, Loss=0.8059, lr=0.0000146 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:34:28 Epoch: 1, Batch: 949/7387, Loss=0.7528, lr=0.0000145 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:34:46 Epoch: 1, Batch: 999/7387, Loss=0.9076, lr=0.0000144 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:35:04 Epoch: 1, Batch: 1049/7387, Loss=0.7383, lr=0.0000143 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:35:22 Epoch: 1, Batch: 1099/7387, Loss=0.8025, lr=0.0000142 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:35:40 Epoch: 1, Batch: 1149/7387, Loss=0.8040, lr=0.0000141 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:20:35:58 Epoch: 1, Batch: 1199/7387, Loss=0.8119, lr=0.0000140 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:36:16 Epoch: 1, Batch: 1249/7387, Loss=0.8000, lr=0.0000138 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:36:34 Epoch: 1, Batch: 1299/7387, Loss=0.7890, lr=0.0000137 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:36:52 Epoch: 1, Batch: 1349/7387, Loss=0.8454, lr=0.0000136 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:37:10 Epoch: 1, Batch: 1399/7387, Loss=0.8185, lr=0.0000135 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:37:28 Epoch: 1, Batch: 1449/7387, Loss=0.8862, lr=0.0000134 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:37:46 Epoch: 1, Batch: 1499/7387, Loss=0.9273, lr=0.0000133 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:38:05 Epoch: 1, Batch: 1549/7387, Loss=0.8540, lr=0.0000132 Time cost=18.6 Thoughput=32.28 samples/s INFO:gluonnlp:20:38:23 Epoch: 1, Batch: 1599/7387, Loss=0.7982, lr=0.0000131 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:38:41 Epoch: 1, Batch: 1649/7387, Loss=0.7372, lr=0.0000129 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:38:59 Epoch: 1, Batch: 1699/7387, Loss=0.7792, lr=0.0000128 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:20:39:17 Epoch: 1, Batch: 1749/7387, Loss=0.9512, lr=0.0000127 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:39:35 Epoch: 1, Batch: 1799/7387, Loss=0.8004, lr=0.0000126 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:20:39:54 Epoch: 1, Batch: 1849/7387, Loss=0.8573, lr=0.0000125 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:40:12 Epoch: 1, Batch: 1899/7387, Loss=0.8584, lr=0.0000124 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:40:30 Epoch: 1, Batch: 1949/7387, Loss=0.8837, lr=0.0000123 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:40:48 Epoch: 1, Batch: 1999/7387, Loss=0.7299, lr=0.0000122 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:41:06 Epoch: 1, Batch: 2049/7387, Loss=0.7591, lr=0.0000120 Time cost=18.1 Thoughput=33.11 samples/s INFO:gluonnlp:20:41:24 Epoch: 1, Batch: 2099/7387, Loss=0.8226, lr=0.0000119 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:41:42 Epoch: 1, Batch: 2149/7387, Loss=0.7917, lr=0.0000118 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:20:42:00 Epoch: 1, Batch: 2199/7387, Loss=0.7923, lr=0.0000117 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:42:18 Epoch: 1, Batch: 2249/7387, Loss=0.7777, lr=0.0000116 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:42:36 Epoch: 1, Batch: 2299/7387, Loss=0.7701, lr=0.0000115 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:42:54 Epoch: 1, Batch: 2349/7387, Loss=0.7772, lr=0.0000114 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:43:13 Epoch: 1, Batch: 2399/7387, Loss=0.7595, lr=0.0000113 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:43:31 Epoch: 1, Batch: 2449/7387, Loss=0.8435, lr=0.0000111 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:43:49 Epoch: 1, Batch: 2499/7387, Loss=0.8569, lr=0.0000110 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:44:07 Epoch: 1, Batch: 2549/7387, Loss=0.8124, lr=0.0000109 Time cost=18.1 Thoughput=33.11 samples/s INFO:gluonnlp:20:44:25 Epoch: 1, Batch: 2599/7387, Loss=0.8626, lr=0.0000108 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:20:44:43 Epoch: 1, Batch: 2649/7387, Loss=0.8736, lr=0.0000107 Time cost=18.0 Thoughput=33.24 samples/s INFO:gluonnlp:20:45:01 Epoch: 1, Batch: 2699/7387, Loss=0.8202, lr=0.0000106 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:45:19 Epoch: 1, Batch: 2749/7387, Loss=0.8250, lr=0.0000105 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:45:37 Epoch: 1, Batch: 2799/7387, Loss=0.7868, lr=0.0000103 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:45:55 Epoch: 1, Batch: 2849/7387, Loss=0.7391, lr=0.0000102 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:46:13 Epoch: 1, Batch: 2899/7387, Loss=0.7755, lr=0.0000101 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:46:31 Epoch: 1, Batch: 2949/7387, Loss=0.8649, lr=0.0000100 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:46:50 Epoch: 1, Batch: 2999/7387, Loss=0.7642, lr=0.0000099 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:47:08 Epoch: 1, Batch: 3049/7387, Loss=0.7990, lr=0.0000098 Time cost=18.0 Thoughput=33.24 samples/s INFO:gluonnlp:20:47:26 Epoch: 1, Batch: 3099/7387, Loss=0.8566, lr=0.0000097 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:47:44 Epoch: 1, Batch: 3149/7387, Loss=0.7719, lr=0.0000096 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:48:02 Epoch: 1, Batch: 3199/7387, Loss=0.8018, lr=0.0000094 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:48:20 Epoch: 1, Batch: 3249/7387, Loss=0.7621, lr=0.0000093 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:48:38 Epoch: 1, Batch: 3299/7387, Loss=0.7815, lr=0.0000092 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:48:56 Epoch: 1, Batch: 3349/7387, Loss=0.8683, lr=0.0000091 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:49:14 Epoch: 1, Batch: 3399/7387, Loss=0.7963, lr=0.0000090 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:49:32 Epoch: 1, Batch: 3449/7387, Loss=0.8872, lr=0.0000089 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:49:50 Epoch: 1, Batch: 3499/7387, Loss=0.7945, lr=0.0000088 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:50:08 Epoch: 1, Batch: 3549/7387, Loss=0.8785, lr=0.0000087 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:50:27 Epoch: 1, Batch: 3599/7387, Loss=0.7975, lr=0.0000085 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:50:45 Epoch: 1, Batch: 3649/7387, Loss=0.8597, lr=0.0000084 Time cost=18.1 Thoughput=33.09 samples/s INFO:gluonnlp:20:51:03 Epoch: 1, Batch: 3699/7387, Loss=0.8087, lr=0.0000083 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:51:21 Epoch: 1, Batch: 3749/7387, Loss=0.8346, lr=0.0000082 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:51:39 Epoch: 1, Batch: 3799/7387, Loss=0.8489, lr=0.0000081 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:51:57 Epoch: 1, Batch: 3849/7387, Loss=0.7944, lr=0.0000080 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:52:15 Epoch: 1, Batch: 3899/7387, Loss=0.7489, lr=0.0000079 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:20:52:33 Epoch: 1, Batch: 3949/7387, Loss=0.8186, lr=0.0000078 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:20:52:51 Epoch: 1, Batch: 3999/7387, Loss=0.7746, lr=0.0000076 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:53:09 Epoch: 1, Batch: 4049/7387, Loss=0.7214, lr=0.0000075 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:53:27 Epoch: 1, Batch: 4099/7387, Loss=0.7938, lr=0.0000074 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:20:53:46 Epoch: 1, Batch: 4149/7387, Loss=0.7562, lr=0.0000073 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:54:04 Epoch: 1, Batch: 4199/7387, Loss=0.8324, lr=0.0000072 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:54:22 Epoch: 1, Batch: 4249/7387, Loss=0.7561, lr=0.0000071 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:20:54:40 Epoch: 1, Batch: 4299/7387, Loss=0.8103, lr=0.0000070 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:54:58 Epoch: 1, Batch: 4349/7387, Loss=0.8730, lr=0.0000069 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:55:16 Epoch: 1, Batch: 4399/7387, Loss=0.7966, lr=0.0000067 Time cost=18.2 Thoughput=32.88 samples/s INFO:gluonnlp:20:55:34 Epoch: 1, Batch: 4449/7387, Loss=0.7266, lr=0.0000066 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:55:52 Epoch: 1, Batch: 4499/7387, Loss=0.8136, lr=0.0000065 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:20:56:10 Epoch: 1, Batch: 4549/7387, Loss=0.7371, lr=0.0000064 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:56:29 Epoch: 1, Batch: 4599/7387, Loss=0.7316, lr=0.0000063 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:56:47 Epoch: 1, Batch: 4649/7387, Loss=0.7802, lr=0.0000062 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:57:05 Epoch: 1, Batch: 4699/7387, Loss=0.7836, lr=0.0000061 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:57:23 Epoch: 1, Batch: 4749/7387, Loss=0.7710, lr=0.0000059 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:20:57:41 Epoch: 1, Batch: 4799/7387, Loss=0.7955, lr=0.0000058 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:57:59 Epoch: 1, Batch: 4849/7387, Loss=0.8423, lr=0.0000057 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:20:58:17 Epoch: 1, Batch: 4899/7387, Loss=0.7811, lr=0.0000056 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:20:58:35 Epoch: 1, Batch: 4949/7387, Loss=0.7916, lr=0.0000055 Time cost=18.2 Thoughput=32.99 samples/s INFO:gluonnlp:20:58:54 Epoch: 1, Batch: 4999/7387, Loss=0.7208, lr=0.0000054 Time cost=18.3 Thoughput=32.81 samples/s INFO:gluonnlp:20:59:12 Epoch: 1, Batch: 5049/7387, Loss=0.8960, lr=0.0000053 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:20:59:30 Epoch: 1, Batch: 5099/7387, Loss=0.7445, lr=0.0000052 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:20:59:48 Epoch: 1, Batch: 5149/7387, Loss=0.7193, lr=0.0000050 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:00:06 Epoch: 1, Batch: 5199/7387, Loss=0.6751, lr=0.0000049 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:21:00:24 Epoch: 1, Batch: 5249/7387, Loss=0.7772, lr=0.0000048 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:21:00:42 Epoch: 1, Batch: 5299/7387, Loss=0.7332, lr=0.0000047 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:21:01:00 Epoch: 1, Batch: 5349/7387, Loss=0.6974, lr=0.0000046 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:01:18 Epoch: 1, Batch: 5399/7387, Loss=0.7048, lr=0.0000045 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:21:01:36 Epoch: 1, Batch: 5449/7387, Loss=0.8802, lr=0.0000044 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:01:54 Epoch: 1, Batch: 5499/7387, Loss=0.7990, lr=0.0000043 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:02:12 Epoch: 1, Batch: 5549/7387, Loss=0.7014, lr=0.0000041 Time cost=18.0 Thoughput=33.25 samples/s INFO:gluonnlp:21:02:30 Epoch: 1, Batch: 5599/7387, Loss=0.7833, lr=0.0000040 Time cost=18.1 Thoughput=33.13 samples/s INFO:gluonnlp:21:02:49 Epoch: 1, Batch: 5649/7387, Loss=0.6833, lr=0.0000039 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:03:07 Epoch: 1, Batch: 5699/7387, Loss=0.8257, lr=0.0000038 Time cost=18.1 Thoughput=33.21 samples/s INFO:gluonnlp:21:03:25 Epoch: 1, Batch: 5749/7387, Loss=0.7447, lr=0.0000037 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:21:03:43 Epoch: 1, Batch: 5799/7387, Loss=0.7581, lr=0.0000036 Time cost=18.1 Thoughput=33.22 samples/s INFO:gluonnlp:21:04:01 Epoch: 1, Batch: 5849/7387, Loss=0.7216, lr=0.0000035 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:04:19 Epoch: 1, Batch: 5899/7387, Loss=0.7282, lr=0.0000034 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:04:37 Epoch: 1, Batch: 5949/7387, Loss=0.7415, lr=0.0000032 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:21:04:55 Epoch: 1, Batch: 5999/7387, Loss=0.7639, lr=0.0000031 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:05:13 Epoch: 1, Batch: 6049/7387, Loss=0.7984, lr=0.0000030 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:21:05:31 Epoch: 1, Batch: 6099/7387, Loss=0.8039, lr=0.0000029 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:21:05:49 Epoch: 1, Batch: 6149/7387, Loss=0.7899, lr=0.0000028 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:06:07 Epoch: 1, Batch: 6199/7387, Loss=0.7969, lr=0.0000027 Time cost=18.1 Thoughput=33.16 samples/s INFO:gluonnlp:21:06:26 Epoch: 1, Batch: 6249/7387, Loss=0.7962, lr=0.0000026 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:21:06:44 Epoch: 1, Batch: 6299/7387, Loss=0.7577, lr=0.0000025 Time cost=18.0 Thoughput=33.27 samples/s INFO:gluonnlp:21:07:02 Epoch: 1, Batch: 6349/7387, Loss=0.7473, lr=0.0000023 Time cost=18.0 Thoughput=33.25 samples/s INFO:gluonnlp:21:07:20 Epoch: 1, Batch: 6399/7387, Loss=0.7642, lr=0.0000022 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:21:07:38 Epoch: 1, Batch: 6449/7387, Loss=0.7664, lr=0.0000021 Time cost=18.1 Thoughput=33.10 samples/s INFO:gluonnlp:21:07:56 Epoch: 1, Batch: 6499/7387, Loss=0.7790, lr=0.0000020 Time cost=18.1 Thoughput=33.17 samples/s INFO:gluonnlp:21:08:14 Epoch: 1, Batch: 6549/7387, Loss=0.7753, lr=0.0000019 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:08:32 Epoch: 1, Batch: 6599/7387, Loss=0.7123, lr=0.0000018 Time cost=18.1 Thoughput=33.14 samples/s INFO:gluonnlp:21:08:50 Epoch: 1, Batch: 6649/7387, Loss=0.8291, lr=0.0000017 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:09:08 Epoch: 1, Batch: 6699/7387, Loss=0.8572, lr=0.0000015 Time cost=18.1 Thoughput=33.12 samples/s INFO:gluonnlp:21:09:26 Epoch: 1, Batch: 6749/7387, Loss=0.8145, lr=0.0000014 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:09:45 Epoch: 1, Batch: 6799/7387, Loss=0.7391, lr=0.0000013 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:21:10:03 Epoch: 1, Batch: 6849/7387, Loss=0.8285, lr=0.0000012 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:21:10:21 Epoch: 1, Batch: 6899/7387, Loss=0.7226, lr=0.0000011 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:21:10:39 Epoch: 1, Batch: 6949/7387, Loss=0.7662, lr=0.0000010 Time cost=18.1 Thoughput=33.20 samples/s INFO:gluonnlp:21:10:57 Epoch: 1, Batch: 6999/7387, Loss=0.7500, lr=0.0000009 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:21:11:15 Epoch: 1, Batch: 7049/7387, Loss=0.8089, lr=0.0000008 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:11:33 Epoch: 1, Batch: 7099/7387, Loss=0.7577, lr=0.0000006 Time cost=18.1 Thoughput=33.18 samples/s INFO:gluonnlp:21:11:51 Epoch: 1, Batch: 7149/7387, Loss=0.7716, lr=0.0000005 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:12:09 Epoch: 1, Batch: 7199/7387, Loss=0.8235, lr=0.0000004 Time cost=18.1 Thoughput=33.15 samples/s INFO:gluonnlp:21:12:27 Epoch: 1, Batch: 7249/7387, Loss=0.7094, lr=0.0000003 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:12:45 Epoch: 1, Batch: 7299/7387, Loss=0.8129, lr=0.0000002 Time cost=18.1 Thoughput=33.19 samples/s INFO:gluonnlp:21:13:03 Epoch: 1, Batch: 7349/7387, Loss=0.7410, lr=0.0000001 Time cost=18.1 Thoughput=33.23 samples/s INFO:gluonnlp:21:13:17 Epoch: 1, Time cost=5353.44 s, Thoughput=1.38 samples/s INFO:gluonnlp:21:13:18 Loader dev data... INFO:gluonnlp:21:13:18 Number of records in Train data:10570 INFO:gluonnlp:21:13:26 The number of examples after preprocessing:10833 INFO:gluonnlp:21:13:26 Start predict INFO:gluonnlp:21:15:18 Inference time cost=111.43 s, Thoughput=4.06 samples/s INFO:gluonnlp:21:15:18 Get prediction results... INFO:gluonnlp:21:17:12 {'exact_match': 81.10690633869442, 'f1': 88.54471298049292}