-------------------------------------------------------------------------- WARNING: Linux kernel CMA support was requested via the btl_vader_single_copy_mechanism MCA variable, but CMA support is not available due to restrictive ptrace settings. The vader shared memory BTL will fall back on another single-copy mechanism if one is available. This may result in lower performance. Local host: ip-172-31-29-212 -------------------------------------------------------------------------- [1,1]:Using AutoAugment [1,3]:Using AutoAugment [1,7]:Using AutoAugment [1,6]:Using AutoAugment [1,5]:Using AutoAugment [1,0]:INFO:root:Namespace(auto_aug=True, batch_size=128, crop_ratio=0.875, data_nthreads=12, dropblock_prob=0, dtype='float32', eval_frequency=5, hard_weight=0.5, input_size=224, label_smoothing=True, last_gamma=True, log_interval=100, lr=0.05, mixup=True, mixup_alpha=0.2, mixup_off_epoch=0, model='resnest14', momentum=0.9, no_cuda=False, no_wd=True, num_epochs=270, rec_train='/home/ubuntu/data/ILSVRC2012/train.rec', rec_val='/home/ubuntu/data/ILSVRC2012/val.rec', resume_epoch=0, resume_params='', resume_states='', save_dir='params_resnest14', save_frequency=20, teacher=None, temperature=20, use_avd=False, use_pretrained=False, use_rec=True, use_sk=False, warmup_epochs=5, warmup_lr=0.0, wd=0.0001) [1,2]:Using AutoAugment [1,0]:Using AutoAugment [1,4]:Using AutoAugment [1,0]:INFO:root:ResNeSt( [1,0]: (conv1): HybridSequential( [1,0]: (0): Conv2D(3 -> 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) [1,0]: (1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=32) [1,0]: (2): Activation(relu) [1,0]: (3): Conv2D(32 -> 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) [1,0]: (4): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=32) [1,0]: (5): Activation(relu) [1,0]: (6): Conv2D(32 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) [1,0]: ) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=64) [1,0]: (relu): Activation(relu) [1,0]: (maxpool): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW) [1,0]: (layer1): HybridSequential( [1,0]: (0): Bottleneck( [1,0]: (conv1): Conv2D(64 -> 64, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=64) [1,0]: (relu1): Activation(relu) [1,0]: (conv2): SplitAttentionConv( [1,0]: (conv): Conv2D(32 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) [1,0]: (bn): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=128) [1,0]: (relu): Activation(relu) [1,0]: (fc1): Conv2D(64 -> 32, kernel_size=(1, 1), stride=(1, 1)) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=32) [1,0]: (relu1): Activation(relu) [1,0]: (fc2): Conv2D(32 -> 128, kernel_size=(1, 1), stride=(1, 1)) [1,0]: ) [1,0]: (conv3): Conv2D(64 -> 256, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=256) [1,0]: (relu3): Activation(relu) [1,0]: (downsample): HybridSequential( [1,0]: (0): AvgPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (1): Conv2D(64 -> 256, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=256) [1,0]: ) [1,0]: ) [1,0]: ) [1,0]: (layer2): HybridSequential( [1,0]: (0): Bottleneck( [1,0]: (conv1): Conv2D(256 -> 128, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=128) [1,0]: (relu1): Activation(relu) [1,0]: (conv2): SplitAttentionConv( [1,0]: (conv): Conv2D(64 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) [1,0]: (bn): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=256) [1,0]: (relu): Activation(relu) [1,0]: (fc1): Conv2D(128 -> 64, kernel_size=(1, 1), stride=(1, 1)) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=64) [1,0]: (relu1): Activation(relu) [1,0]: (fc2): Conv2D(64 -> 256, kernel_size=(1, 1), stride=(1, 1)) [1,0]: ) [1,0]: (conv3): Conv2D(128 -> 512, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=512) [1,0]: (avd_layer): AvgPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (relu3): Activation(relu) [1,0]: (downsample): HybridSequential( [1,0]: (0): AvgPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=True, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (1): Conv2D(256 -> 512, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=512) [1,0]: ) [1,0]: ) [1,0]: ) [1,0]: (layer3): HybridSequential( [1,0]: (0): Bottleneck( [1,0]: (conv1): Conv2D(512 -> 256, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, mo[1,0]:mentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=256) [1,0]: (relu1): Activation(relu) [1,0]: (conv2): SplitAttentionConv( [1,0]: (conv): Conv2D(128 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) [1,0]: (bn): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=512) [1,0]: (relu): Activation(relu) [1,0]: (fc1): Conv2D(256 -> 128, kernel_size=(1, 1), stride=(1, 1)) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=128) [1,0]: (relu1): Activation(relu) [1,0]: (fc2): Conv2D(128 -> 512, kernel_size=(1, 1), stride=(1, 1)) [1,0]: ) [1,0]: (conv3): Conv2D(256 -> 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=1024) [1,0]: (avd_layer): AvgPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (relu3): Activation(relu) [1,0]: (downsample): HybridSequential( [1,0]: (0): AvgPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=True, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (1): Conv2D(512 -> 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=1024) [1,0]: ) [1,0]: ) [1,0]: ) [1,0]: (layer4): HybridSequential( [1,0]: (0): Bottleneck( [1,0]: (conv1): Conv2D(1024 -> 512, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=512) [1,0]: (relu1): Activation(relu) [1,0]: (conv2): SplitAttentionConv( [1,0]: (conv): Conv2D(256 -> 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) [1,0]: (bn): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=1024) [1,0]: (relu): Activation(relu) [1,0]: (fc1): Conv2D(512 -> 256, kernel_size=(1, 1), stride=(1, 1)) [1,0]: (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=256) [1,0]: (relu1): Activation(relu) [1,0]: (fc2): Conv2D(256 -> 1024, kernel_size=(1, 1), stride=(1, 1)) [1,0]: ) [1,0]: (conv3): Conv2D(512 -> 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (bn3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=2048) [1,0]: (avd_layer): AvgPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (relu3): Activation(relu) [1,0]: (downsample): HybridSequential( [1,0]: (0): AvgPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=True, global_pool=False, pool_type=avg, layout=NCHW) [1,0]: (1): Conv2D(1024 -> 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) [1,0]: (2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=2048) [1,0]: ) [1,0]: ) [1,0]: ) [1,0]: (avgpool): GlobalAvgPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True, global_pool=True, pool_type=avg, layout=NCHW) [1,0]: (flat): Flatten [1,0]: (fc): Dense(2048 -> 1000, linear) [1,0]:) [ip-172-31-29-212:52930] 7 more processes have sent help message help-btl-vader.txt / cma-permission-denied [ip-172-31-29-212:52930] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages [1,7]:[02:55:16] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,1]:[02:55:17] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,2]:[02:55:17] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,6]:[02:55:18] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,4]:[02:55:18] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,3]:[02:55:18] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,0]:[02:55:19] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,5]:[02:55:20] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,0]:INFO:root:Epoch[0] Batch[100] Loss[6.968] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[100] rmse=0.026110 lr=0.006390 [1,0]:INFO:root:Epoch[0] Batch[200] Loss[6.902] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[200] rmse=0.025904 lr=0.012780 [1,0]:INFO:root:Epoch[0] Batch[300] Loss[6.920] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[300] rmse=0.025836 lr=0.019169 [1,0]:INFO:root:Epoch[0] Batch[400] Loss[6.819] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[400] rmse=0.025832 lr=0.025559 [1,0]:INFO:root:Epoch[0] Batch[500] Loss[6.645] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[500] rmse=0.025945 lr=0.031949 [1,0]:INFO:root:Epoch[0] Batch[600] Loss[6.698] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[600] rmse=0.025930 lr=0.038339 [1,0]:INFO:root:Epoch[0] Batch[700] Loss[6.723] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[700] rmse=0.025995 lr=0.044728 [1,0]:INFO:root:Epoch[0] Batch[800] Loss[6.681] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[800] rmse=0.026082 lr=0.051118 [1,0]:INFO:root:Epoch[0] Batch[900] Loss[6.368] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[900] rmse=0.026126 lr=0.057508 [1,0]:INFO:root:Epoch[0] Batch[1000] Loss[6.377] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[1000] rmse=0.026119 lr=0.063898 [1,0]:INFO:root:Epoch[0] Batch[1100] Loss[6.606] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[1100] rmse=0.026145 lr=0.070288 [1,0]:INFO:root:Epoch[0] Batch[1200] Loss[6.162] [1,0]:INFO:root:Epoch[0] Rank[0] Batch[1200] rmse=0.026096 lr=0.076677 [1,0]:INFO:root:Epoch[0] Rank[0] Batch[1251] Time cost=411.25 Train-metric=0.026114 [1,0]:INFO:root:Epoch[0] Speed: 3114.92 samples/sec [1,0]:INFO:root:Epoch[1] Batch[100] Loss[6.538] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[100] rmse=0.026010 lr=0.086326 [1,0]:INFO:root:Epoch[1] Batch[200] Loss[6.005] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[200] rmse=0.026021 lr=0.092716 [1,0]:INFO:root:Epoch[1] Batch[300] Loss[6.032] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[300] rmse=0.026005 lr=0.099105 [1,0]:INFO:root:Epoch[1] Batch[400] Loss[6.288] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[400] rmse=0.025951 lr=0.105495 [1,0]:INFO:root:Epoch[1] Batch[500] Loss[5.896] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[500] rmse=0.025931 lr=0.111885 [1,0]:INFO:root:Epoch[1] Batch[600] Loss[5.924] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[600] rmse=0.026036 lr=0.118275 [1,0]:INFO:root:Epoch[1] Batch[700] Loss[5.784] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[700] rmse=0.026068 lr=0.124665 [1,0]:INFO:root:Epoch[1] Batch[800] Loss[6.021] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[800] rmse=0.026045 lr=0.131054 [1,0]:INFO:root:Epoch[1] Batch[900] Loss[6.072] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[900] rmse=0.026026 lr=0.137444 [1,0]:INFO:root:Epoch[1] Batch[1000] Loss[5.878] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[1000] rmse=0.026021 lr=0.143834 [1,0]:INFO:root:Epoch[1] Batch[1100] Loss[6.373] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[1100] rmse=0.025989 lr=0.150224 [1,0]:INFO:root:Epoch[1] Batch[1200] Loss[6.210] [1,0]:INFO:root:Epoch[1] Rank[0] Batch[1200] rmse=0.025946 lr=0.156613 [1,0]:INFO:root:Epoch[1] Rank[0] Batch[1251] Time cost=403.06 Train-metric=0.025975 [1,0]:INFO:root:Epoch[1] Speed: 3178.23 samples/sec [1,0]:INFO:root:Epoch[2] Batch[100] Loss[6.393] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[100] rmse=0.025684 lr=0.166262 [1,0]:INFO:root:Epoch[2] Batch[200] Loss[5.491] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[200] rmse=0.025817 lr=0.172652 [1,0]:INFO:root:Epoch[2] Batch[300] Loss[5.498] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[300] rmse=0.025850 lr=0.179042 [1,0]:INFO:root:Epoch[2] Batch[400] Loss[5.816] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[400] rmse=0.025743 lr=0.185431 [1,0]:INFO:root:Epoch[2] Batch[500] Loss[6.468] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[500] rmse=0.025642 lr=0.191821 [1,0]:INFO:root:Epoch[2] Batch[600] Loss[5.075] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[600] rmse=0.025579 lr=0.198211 [1,0]:INFO:root:Epoch[2] Batch[700] Loss[5.993] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[700] rmse=0.025634 lr=0.204601 [1,0]:INFO:root:Epoch[2] Batch[800] Loss[5.910] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[800] rmse=0.025607 lr=0.210990 [1,0]:INFO:root:Epoch[2] Batch[900] Loss[4.931] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[900] rmse=0.025564 lr=0.217380 [1,0]:INFO:root:Epoch[2] Batch[1000] Loss[5.203] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[1000] rmse=0.025557 lr=0.223770 [1,0]:INFO:root:Epoch[2] Batch[1100] Loss[6.421] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[1100] rmse=0.025573 lr=0.230160 [1,0]:INFO:root:Epoch[2] Batch[1200] Loss[5.375] [1,0]:INFO:root:Epoch[2] Rank[0] Batch[1200] rmse=0.025520 lr=0.236550 [1,0]:INFO:root:Epoch[2] Rank[0] Batch[1251] Time cost=399.89 Train-metric=0.025506 [1,0]:INFO:root:Epoch[2] Speed: 3203.44 samples/sec [1,0]:INFO:root:Epoch[3] Batch[100] Loss[4.938] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[100] rmse=0.025412 lr=0.246198 [1,0]:INFO:root:Epoch[3] Batch[200] Loss[4.823] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[200] rmse=0.025167 lr=0.252588 [1,0]:INFO:root:Epoch[3] Batch[300] Loss[4.837] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[300] rmse=0.025393 lr=0.258978 [1,0]:INFO:root:Epoch[3] Batch[400] Loss[5.892] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[400] rmse=0.025473 lr=0.265367 [1,0]:INFO:root:Epoch[3] Batch[500] Loss[6.228] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[500] rmse=0.025425 lr=0.271757 [1,0]:INFO:root:Epoch[3] Batch[600] Loss[4.889] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[600] rmse=0.025333 lr=0.278147 [1,0]:INFO:root:Epoch[3] Batch[700] Loss[5.523] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[700] rmse=0.025333 lr=0.284537 [1,0]:INFO:root:Epoch[3] Batch[800] Loss[4.585] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[800] rmse=0.025308 lr=0.290927 [1,0]:INFO:root:Epoch[3] Batch[900] Loss[4.672] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[900] rmse=0.025262 lr=0.297316 [1,0]:INFO:root:Epoch[3] Batch[1000] Loss[4.370] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[1000] rmse=0.025265 lr=0.303706 [1,0]:INFO:root:Epoch[3] Batch[1100] Loss[4.562] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[1100] rmse=0.025291 lr=0.310096 [1,0]:INFO:root:Epoch[3] Batch[1200] Loss[4.394] [1,0]:INFO:root:Epoch[3] Rank[0] Batch[1200] rmse=0.025231 lr=0.316486 [1,0]:INFO:root:Epoch[3] Rank[0] Batch[1251] Time cost=402.00 Train-metric=0.025214 [1,0]:INFO:root:Epoch[3] Speed: 3186.64 samples/sec [1,0]:INFO:root:Epoch[4] Batch[100] Loss[5.590] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[100] rmse=0.024661 lr=0.326134 [1,0]:INFO:root:Epoch[4] Batch[200] Loss[4.566] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[200] rmse=0.024546 lr=0.332524 [1,0]:INFO:root:Epoch[4] Batch[300] Loss[4.503] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[300] rmse=0.024636 lr=0.338914 [1,0]:INFO:root:Epoch[4] Batch[400] Loss[4.048] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[400] rmse=0.024755 lr=0.345304 [1,0]:INFO:root:Epoch[4] Batch[500] Loss[4.986] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[500] rmse=0.024727 lr=0.351693 [1,0]:INFO:root:Epoch[4] Batch[600] Loss[4.287] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[600] rmse=0.024665 lr=0.358083 [1,0]:INFO:root:Epoch[4] Batch[700] Loss[4.859] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[700] rmse=0.024630 lr=0.364473 [1,0]:INFO:root:Epoch[4] Batch[800] Loss[4.753] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[800] rmse=0.024636 lr=0.370863 [1,0]:INFO:root:Epoch[4] Batch[900] Loss[3.926] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[900] rmse=0.024622 lr=0.377252 [1,0]:INFO:root:Epoch[4] Batch[1000] Loss[4.337] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[1000] rmse=0.024565 lr=0.383642 [1,0]:INFO:root:Epoch[4] Batch[1100] Loss[5.897] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[1100] rmse=0.024542 lr=0.390032 [1,0]:INFO:root:Epoch[4] Batch[1200] Loss[4.200] [1,0]:INFO:root:Epoch[4] Rank[0] Batch[1200] rmse=0.024544 lr=0.396422 [1,0]:INFO:root:Epoch[4] Rank[0] Batch[1251] Time cost=400.81 Train-metric=0.024516 [1,0]:INFO:root:Epoch[4] Speed: 3196.10 samples/sec [1,1]:[03:28:57] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,0]:[03:28:57] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,6]:[03:28:57] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,4]:[03:28:57] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,2]:[03:28:57] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,7]:[03:28:58] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,3]:[03:28:58] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,5]:[03:28:58] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) [1,0]:INFO:root:Epoch[4] Rank[0] Validation-accuracy=0.329120 Validation-top_k_accuracy_5=0.591300 [1,0]:INFO:root:Epoch[5] Batch[100] Loss[6.020] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[100] rmse=0.023842 lr=0.400000 [1,0]:INFO:root:Epoch[5] Batch[200] Loss[4.476] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[200] rmse=0.023963 lr=0.400000 [1,0]:INFO:root:Epoch[5] Batch[300] Loss[5.560] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[300] rmse=0.023967 lr=0.399999 [1,0]:INFO:root:Epoch[5] Batch[400] Loss[5.731] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[400] rmse=0.024017 lr=0.399999 [1,0]:INFO:root:Epoch[5] Batch[500] Loss[4.411] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[500] rmse=0.024053 lr=0.399998 [1,0]:INFO:root:Epoch[5] Batch[600] Loss[3.651] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[600] rmse=0.024110 lr=0.399997 [1,0]:INFO:root:Epoch[5] Batch[700] Loss[4.031] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[700] rmse=0.024078 lr=0.399996 [1,0]:INFO:root:Epoch[5] Batch[800] Loss[3.611] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[800] rmse=0.024064 lr=0.399994 [1,0]:INFO:root:Epoch[5] Batch[900] Loss[3.908] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[900] rmse=0.024084 lr=0.399993 [1,0]:INFO:root:Epoch[5] Batch[1000] Loss[4.317] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[1000] rmse=0.024048 lr=0.399991 [1,0]:INFO:root:Epoch[5] Batch[1100] Loss[4.801] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[1100] rmse=0.024017 lr=0.399989 [1,0]:INFO:root:Epoch[5] Batch[1200] Loss[4.117] [1,0]:INFO:root:Epoch[5] Rank[0] Batch[1200] rmse=0.024014 lr=0.399987 [1,0]:INFO:root:Epoch[5] Rank[0] Batch[1251] Time cost=396.90 Train-metric=0.024009 [1,0]:INFO:root:Epoch[5] Speed: 3227.59 samples/sec [1,0]:INFO:root:Epoch[6] Batch[100] Loss[5.530] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[100] rmse=0.023413 lr=0.399984 [1,0]:INFO:root:Epoch[6] Batch[200] Loss[3.681] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[200] rmse=0.023487 lr=0.399981 [1,0]:INFO:root:Epoch[6] Batch[300] Loss[5.505] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[300] rmse=0.023505 lr=0.399979 [1,0]:INFO:root:Epoch[6] Batch[400] Loss[5.287] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[400] rmse=0.023518 lr=0.399976 [1,0]:INFO:root:Epoch[6] Batch[500] Loss[5.637] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[500] rmse=0.023517 lr=0.399973 [1,0]:INFO:root:Epoch[6] Batch[600] Loss[3.670] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[600] rmse=0.023561 lr=0.399969 [1,0]:INFO:root:Epoch[6] Batch[700] Loss[4.074] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[700] rmse=0.023520 lr=0.399966 [1,0]:INFO:root:Epoch[6] Batch[800] Loss[3.888] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[800] rmse=0.023505 lr=0.399962 [1,0]:INFO:root:Epoch[6] Batch[900] Loss[5.590] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[900] rmse=0.023511 lr=0.399959 [1,0]:INFO:root:Epoch[6] Batch[1000] Loss[4.002] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[1000] rmse=0.023506 lr=0.399955 [1,0]:INFO:root:Epoch[6] Batch[1100] Loss[3.459] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[1100] rmse=0.023486 lr=0.399951 [1,0]:INFO:root:Epoch[6] Batch[1200] Loss[3.580] [1,0]:INFO:root:Epoch[6] Rank[0] Batch[1200] rmse=0.023465 lr=0.399946 [1,0]:INFO:root:Epoch[6] Rank[0] Batch[1251] Time cost=401.24 Train-metric=0.023467 [1,0]:INFO:root:Epoch[6] Speed: 3192.67 samples/sec [1,0]:INFO:root:Epoch[7] Batch[100] Loss[5.722] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[100] rmse=0.023108 lr=0.399940 [1,0]:INFO:root:Epoch[7] Batch[200] Loss[3.651] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[200] rmse=0.023077 lr=0.399935 [1,0]:INFO:root:Epoch[7] Batch[300] Loss[5.294] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[300] rmse=0.023188 lr=0.399930 [1,0]:INFO:root:Epoch[7] Batch[400] Loss[3.546] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[400] rmse=0.023169 lr=0.399925 [1,0]:INFO:root:Epoch[7] Batch[500] Loss[3.698] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[500] rmse=0.023181 lr=0.399919 [1,0]:INFO:root:Epoch[7] Batch[600] Loss[4.370] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[600] rmse=0.023200 lr=0.399914 [1,0]:INFO:root:Epoch[7] Batch[700] Loss[3.864] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[700] rmse=0.023206 lr=0.399908 [1,0]:INFO:root:Epoch[7] Batch[800] Loss[5.142] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[800] rmse=0.023172 lr=0.399903 [1,0]:INFO:root:Epoch[7] Batch[900] Loss[4.452] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[900] rmse=0.023170 lr=0.399897 [1,0]:INFO:root:Epoch[7] Batch[1000] Loss[3.645] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[1000] rmse=0.023207 lr=0.399890 [1,0]:INFO:root:Epoch[7] Batch[1100] Loss[3.806] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[1100] rmse=0.023203 lr=0.399884 [1,0]:INFO:root:Epoch[7] Batch[1200] Loss[3.898] [1,0]:INFO:root:Epoch[7] Rank[0] Batch[1200] rmse=0.023223 lr=0.399877 [1,0]:INFO:root:Epoch[7] Rank[0] Batch[1251] Time cost=402.08 Train-metric=0.023212 [1,0]:INFO:root:Epoch[7] Speed: 3186.02 samples/sec [1,0]:INFO:root:Epoch[8] Batch[100] Loss[3.634] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[100] rmse=0.022877 lr=0.399867 [1,0]:INFO:root:Epoch[8] Batch[200] Loss[3.400] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[200] rmse=0.022901 lr=0.399860 [1,0]:INFO:root:Epoch[8] Batch[300] Loss[4.747] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[300] rmse=0.022917 lr=0.399853 [1,0]:INFO:root:Epoch[8] Batch[400] Loss[5.370] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[400] rmse=0.022890 lr=0.399846 [1,0]:INFO:root:Epoch[8] Batch[500] Loss[3.378] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[500] rmse=0.022935 lr=0.399838 [1,0]:INFO:root:Epoch[8] Batch[600] Loss[5.494] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[600] rmse=0.022949 lr=0.399831 [1,0]:INFO:root:Epoch[8] Batch[700] Loss[4.168] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[700] rmse=0.022956 lr=0.399823 [1,0]:INFO:root:Epoch[8] Batch[800] Loss[4.942] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[800] rmse=0.022968 lr=0.399815 [1,0]:INFO:root:Epoch[8] Batch[900] Loss[3.396] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[900] rmse=0.022944 lr=0.399806 [1,0]:INFO:root:Epoch[8] Batch[1000] Loss[3.228] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[1000] rmse=0.022935 lr=0.399798 [1,0]:INFO:root:Epoch[8] Batch[1100] Loss[3.025] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[1100] rmse=0.022899 lr=0.399789 [1,0]:INFO:root:Epoch[8] Batch[1200] Loss[3.342] [1,0]:INFO:root:Epoch[8] Rank[0] Batch[1200] rmse=0.022874 lr=0.399781 [1,0]:INFO:root:Epoch[8] Rank[0] Batch[1251] Time cost=399.20 Train-metric=0.022872 [1,0]:INFO:root:Epoch[8] Speed: 3208.99 samples/sec [1,0]:INFO:root:Epoch[9] Batch[100] Loss[3.327] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[100] rmse=0.022718 lr=0.399767 [1,0]:INFO:root:Epoch[9] Batch[200] Loss[4.662] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[200] rmse=0.022743 lr=0.399758 [1,0]:INFO:root:Epoch[9] Batch[300] Loss[5.136] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[300] rmse=0.022730 lr=0.399748 [1,0]:INFO:root:Epoch[9] Batch[400] Loss[3.182] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[400] rmse=0.022725 lr=0.399739 [1,0]:INFO:root:Epoch[9] Batch[500] Loss[5.391] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[500] rmse=0.022673 lr=0.399729 [1,0]:INFO:root:Epoch[9] Batch[600] Loss[4.219] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[600] rmse=0.022653 lr=0.399719 [1,0]:INFO:root:Epoch[9] Batch[700] Loss[5.428] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[700] rmse=0.022639 lr=0.399709 [1,0]:INFO:root:Epoch[9] Batch[800] Loss[4.167] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[800] rmse=0.022641 lr=0.399699 [1,0]:INFO:root:Epoch[9] Batch[900] Loss[3.790] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[900] rmse=0.022647 lr=0.399688 [1,0]:INFO:root:Epoch[9] Batch[1000] Loss[4.589] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[1000] rmse=0.022653 lr=0.399677 [1,0]:INFO:root:Epoch[9] Batch[1100] Loss[5.346] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[1100] rmse=0.022666 lr=0.399667 [1,0]:INFO:root:Epoch[9] Batch[1200] Loss[3.867] [1,0]:INFO:root:Epoch[9] Rank[0] Batch[1200] rmse=0.022685 lr=0.399656 [1,0]:INFO:root:Epoch[9] Rank[0] Batch[1251] Time cost=398.21 Train-metric=0.022700 [1,0]:INFO:root:Epoch[9] Speed: 3216.93 samples/sec [1,0]:INFO:root:Epoch[9] Rank[0] Validation-accuracy=0.477440 Validation-top_k_accuracy_5=0.736700 [1,0]:INFO:root:Epoch[10] Batch[100] Loss[3.537] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[100] rmse=0.022510 lr=0.399639 [1,0]:INFO:root:Epoch[10] Batch[200] Loss[3.451] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[200] rmse=0.022540 lr=0.399627 [1,0]:INFO:root:Epoch[10] Batch[300] Loss[3.211] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[300] rmse=0.022558 lr=0.399615 [1,0]:INFO:root:Epoch[10] Batch[400] Loss[3.461] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[400] rmse=0.022615 lr=0.399604 [1,0]:INFO:root:Epoch[10] Batch[500] Loss[3.595] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[500] rmse=0.022633 lr=0.399592 [1,0]:INFO:root:Epoch[10] Batch[600] Loss[4.230] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[600] rmse=0.022666 lr=0.399579 [1,0]:INFO:root:Epoch[10] Batch[700] Loss[3.641] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[700] rmse=0.022649 lr=0.399567 [1,0]:INFO:root:Epoch[10] Batch[800] Loss[3.503] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[800] rmse=0.022657 lr=0.399555 [1,0]:INFO:root:Epoch[10] Batch[900] Loss[3.414] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[900] rmse=0.022651 lr=0.399542 [1,0]:INFO:root:Epoch[10] Batch[1000] Loss[3.552] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[1000] rmse=0.022616 lr=0.399529 [1,0]:INFO:root:Epoch[10] Batch[1100] Loss[3.272] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[1100] rmse=0.022630 lr=0.399516 [1,0]:INFO:root:Epoch[10] Batch[1200] Loss[5.053] [1,0]:INFO:root:Epoch[10] Rank[0] Batch[1200] rmse=0.022605 lr=0.399503 [1,0]:INFO:root:Epoch[10] Rank[0] Batch[1251] Time cost=401.45 Train-metric=0.022596 [1,0]:INFO:root:Epoch[10] Speed: 3190.99 samples/sec [1,0]:INFO:root:Epoch[11] Batch[100] Loss[3.316] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[100] rmse=0.022474 lr=0.399482 [1,0]:INFO:root:Epoch[11] Batch[200] Loss[3.661] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[200] rmse=0.022433 lr=0.399469 [1,0]:INFO:root:Epoch[11] Batch[300] Loss[3.185] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[300] rmse=0.022512 lr=0.399455 [1,0]:INFO:root:Epoch[11] Batch[400] Loss[3.373] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[400] rmse=0.022498 lr=0.399441 [1,0]:INFO:root:Epoch[11] Batch[500] Loss[3.382] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[500] rmse=0.022454 lr=0.399426 [1,0]:INFO:root:Epoch[11] Batch[600] Loss[3.227] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[600] rmse=0.022425 lr=0.399412 [1,0]:INFO:root:Epoch[11] Batch[700] Loss[5.718] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[700] rmse=0.022460 lr=0.399397 [1,0]:INFO:root:Epoch[11] Batch[800] Loss[3.314] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[800] rmse=0.022441 lr=0.399383 [1,0]:INFO:root:Epoch[11] Batch[900] Loss[3.478] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[900] rmse=0.022465 lr=0.399368 [1,0]:INFO:root:Epoch[11] Batch[1000] Loss[3.298] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[1000] rmse=0.022444 lr=0.399352 [1,0]:INFO:root:Epoch[11] Batch[1100] Loss[3.302] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[1100] rmse=0.022459 lr=0.399337 [1,0]:INFO:root:Epoch[11] Batch[1200] Loss[5.339] [1,0]:INFO:root:Epoch[11] Rank[0] Batch[1200] rmse=0.022443 lr=0.399322 [1,0]:INFO:root:Epoch[11] Rank[0] Batch[1251] Time cost=398.20 Train-metric=0.022444 [1,0]:INFO:root:Epoch[11] Speed: 3217.05 samples/sec [1,0]:INFO:root:Epoch[12] Batch[100] Loss[3.303] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[100] rmse=0.022407 lr=0.399298 [1,0]:INFO:root:Epoch[12] Batch[200] Loss[3.812] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[200] rmse=0.022429 lr=0.399282 [1,0]:INFO:root:Epoch[12] Batch[300] Loss[3.042] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[300] rmse=0.022403 lr=0.399266 [1,0]:INFO:root:Epoch[12] Batch[400] Loss[3.392] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[400] rmse=0.022333 lr=0.399249 [1,0]:INFO:root:Epoch[12] Batch[500] Loss[3.049] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[500] rmse=0.022420 lr=0.399233 [1,0]:INFO:root:Epoch[12] Batch[600] Loss[3.536] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[600] rmse=0.022398 lr=0.399216 [1,0]:INFO:root:Epoch[12] Batch[700] Loss[3.481] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[700] rmse=0.022381 lr=0.399200 [1,0]:INFO:root:Epoch[12] Batch[800] Loss[3.579] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[800] rmse=0.022397 lr=0.399182 [1,0]:INFO:root:Epoch[12] Batch[900] Loss[5.378] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[900] rmse=0.022395 lr=0.399165 [1,0]:INFO:root:Epoch[12] Batch[1000] Loss[3.516] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[1000] rmse=0.022421 lr=0.399148 [1,0]:INFO:root:Epoch[12] Batch[1100] Loss[3.380] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[1100] rmse=0.022440 lr=0.399130 [1,0]:INFO:root:Epoch[12] Batch[1200] Loss[3.275] [1,0]:INFO:root:Epoch[12] Rank[0] Batch[1200] rmse=0.022440 lr=0.399113 [1,0]:INFO:root:Epoch[12] Rank[0] Batch[1251] Time cost=398.92 Train-metric=0.022427 [1,0]:INFO:root:Epoch[12] Speed: 3211.23 samples/sec [1,0]:INFO:root:Epoch[13] Batch[100] Loss[5.545] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[100] rmse=0.022518 lr=0.399086 [1,0]:INFO:root:Epoch[13] Batch[200] Loss[3.106] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[200] rmse=0.022432 lr=0.399067 [1,0]:INFO:root:Epoch[13] Batch[300] Loss[4.162] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[300] rmse=0.022425 lr=0.399049 [1,0]:INFO:root:Epoch[13] Batch[400] Loss[3.275] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[400] rmse=0.022408 lr=0.399030 [1,0]:INFO:root:Epoch[13] Batch[500] Loss[3.476] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[500] rmse=0.022400 lr=0.399012 [1,0]:INFO:root:Epoch[13] Batch[600] Loss[3.433] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[600] rmse=0.022388 lr=0.398993 [1,0]:INFO:root:Epoch[13] Batch[700] Loss[5.330] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[700] rmse=0.022357 lr=0.398974 [1,0]:INFO:root:Epoch[13] Batch[800] Loss[3.449] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[800] rmse=0.022392 lr=0.398955 [1,0]:INFO:root:Epoch[13] Batch[900] Loss[4.113] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[900] rmse=0.022427 lr=0.398935 [1,0]:INFO:root:Epoch[13] Batch[1000] Loss[5.247] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[1000] rmse=0.022413 lr=0.398916 [1,0]:INFO:root:Epoch[13] Batch[1100] Loss[4.209] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[1100] rmse=0.022366 lr=0.398896 [1,0]:INFO:root:Epoch[13] Batch[1200] Loss[3.249] [1,0]:INFO:root:Epoch[13] Rank[0] Batch[1200] rmse=0.022372 lr=0.398876 [1,0]:INFO:root:Epoch[13] Rank[0] Batch[1251] Time cost=399.78 Train-metric=0.022372 [1,0]:INFO:root:Epoch[13] Speed: 3204.29 samples/sec [1,0]:INFO:root:Epoch[14] Batch[100] Loss[3.286] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[100] rmse=0.022107 lr=0.398845 [1,0]:INFO:root:Epoch[14] Batch[200] Loss[3.825] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[200] rmse=0.022128 lr=0.398825 [1,0]:INFO:root:Epoch[14] Batch[300] Loss[3.802] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[300] rmse=0.022189 lr=0.398804 [1,0]:INFO:root:Epoch[14] Batch[400] Loss[4.039] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[400] rmse=0.022144 lr=0.398784 [1,0]:INFO:root:Epoch[14] Batch[500] Loss[3.356] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[500] rmse=0.022214 lr=0.398763 [1,0]:INFO:root:Epoch[14] Batch[600] Loss[3.274] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[600] rmse=0.022236 lr=0.398741 [1,0]:INFO:root:Epoch[14] Batch[700] Loss[3.259] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[700] rmse=0.022243 lr=0.398720 [1,0]:INFO:root:Epoch[14] Batch[800] Loss[3.648] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[800] rmse=0.022238 lr=0.398699 [1,0]:INFO:root:Epoch[14] Batch[900] Loss[3.723] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[900] rmse=0.022239 lr=0.398677 [1,0]:INFO:root:Epoch[14] Batch[1000] Loss[5.552] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[1000] rmse=0.022243 lr=0.398655 [1,0]:INFO:root:Epoch[14] Batch[1100] Loss[3.233] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[1100] rmse=0.022212 lr=0.398633 [1,0]:INFO:root:Epoch[14] Batch[1200] Loss[5.406] [1,0]:INFO:root:Epoch[14] Rank[0] Batch[1200] rmse=0.022197 lr=0.398611 [1,0]:INFO:root:Epoch[14] Rank[0] Batch[1251] Time cost=399.10 Train-metric=0.022203 [1,0]:INFO:root:Epoch[14] Speed: 3209.79 samples/sec [1,0]:INFO:root:Epoch[14] Rank[0] Validation-accuracy=0.515760 Validation-top_k_accuracy_5=0.768100 [1,0]:INFO:root:Epoch[15] Batch[100] Loss[3.201] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[100] rmse=0.022317 lr=0.398577 [1,0]:INFO:root:Epoch[15] Batch[200] Loss[3.376] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[200] rmse=0.022214 lr=0.398554 [1,0]:INFO:root:Epoch[15] Batch[300] Loss[3.470] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[300] rmse=0.022243 lr=0.398532 [1,0]:INFO:root:Epoch[15] Batch[400] Loss[3.489] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[400] rmse=0.022248 lr=0.398509 [1,0]:INFO:root:Epoch[15] Batch[500] Loss[3.095] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[500] rmse=0.022209 lr=0.398485 [1,0]:INFO:root:Epoch[15] Batch[600] Loss[4.500] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[600] rmse=0.022174 lr=0.398462 [1,0]:INFO:root:Epoch[15] Batch[700] Loss[3.170] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[700] rmse=0.022161 lr=0.398439 [1,0]:INFO:root:Epoch[15] Batch[800] Loss[3.654] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[800] rmse=0.022142 lr=0.398415 [1,0]:INFO:root:Epoch[15] Batch[900] Loss[3.215] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[900] rmse=0.022121 lr=0.398391 [1,0]:INFO:root:Epoch[15] Batch[1000] Loss[3.682] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[1000] rmse=0.022135 lr=0.398367 [1,0]:INFO:root:Epoch[15] Batch[1100] Loss[5.445] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[1100] rmse=0.022123 lr=0.398343 [1,0]:INFO:root:Epoch[15] Batch[1200] Loss[3.763] [1,0]:INFO:root:Epoch[15] Rank[0] Batch[1200] rmse=0.022127 lr=0.398318 [1,0]:INFO:root:Epoch[15] Rank[0] Batch[1251] Time cost=401.09 Train-metric=0.022139 [1,0]:INFO:root:Epoch[15] Speed: 3193.85 samples/sec [1,0]:INFO:root:Epoch[16] Batch[100] Loss[3.150] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[100] rmse=0.022021 lr=0.398281 [1,0]:INFO:root:Epoch[16] Batch[200] Loss[3.884] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[200] rmse=0.021990 lr=0.398256 [1,0]:INFO:root:Epoch[16] Batch[300] Loss[5.601] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[300] rmse=0.022036 lr=0.398231 [1,0]:INFO:root:Epoch[16] Batch[400] Loss[3.017] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[400] rmse=0.022083 lr=0.398206 [1,0]:INFO:root:Epoch[16] Batch[500] Loss[4.853] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[500] rmse=0.022101 lr=0.398181 [1,0]:INFO:root:Epoch[16] Batch[600] Loss[3.438] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[600] rmse=0.022071 lr=0.398155 [1,0]:INFO:root:Epoch[16] Batch[700] Loss[3.504] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[700] rmse=0.022116 lr=0.398129 [1,0]:INFO:root:Epoch[16] Batch[800] Loss[4.341] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[800] rmse=0.022123 lr=0.398103 [1,0]:INFO:root:Epoch[16] Batch[900] Loss[3.437] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[900] rmse=0.022145 lr=0.398077 [1,0]:INFO:root:Epoch[16] Batch[1000] Loss[5.461] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[1000] rmse=0.022107 lr=0.398051 [1,0]:INFO:root:Epoch[16] Batch[1100] Loss[3.206] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[1100] rmse=0.022097 lr=0.398024 [1,0]:INFO:root:Epoch[16] Batch[1200] Loss[3.420] [1,0]:INFO:root:Epoch[16] Rank[0] Batch[1200] rmse=0.022095 lr=0.397998 [1,0]:INFO:root:Epoch[16] Rank[0] Batch[1251] Time cost=400.70 Train-metric=0.022101 [1,0]:INFO:root:Epoch[16] Speed: 3196.96 samples/sec [1,0]:INFO:root:Epoch[17] Batch[100] Loss[3.242] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[100] rmse=0.021988 lr=0.397957 [1,0]:INFO:root:Epoch[17] Batch[200] Loss[3.223] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[200] rmse=0.022069 lr=0.397930 [1,0]:INFO:root:Epoch[17] Batch[300] Loss[4.792] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[300] rmse=0.022083 lr=0.397903 [1,0]:INFO:root:Epoch[17] Batch[400] Loss[4.180] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[400] rmse=0.022067 lr=0.397875 [1,0]:INFO:root:Epoch[17] Batch[500] Loss[4.909] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[500] rmse=0.022020 lr=0.397848 [1,0]:INFO:root:Epoch[17] Batch[600] Loss[3.771] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[600] rmse=0.021950 lr=0.397820 [1,0]:INFO:root:Epoch[17] Batch[700] Loss[3.207] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[700] rmse=0.021988 lr=0.397792 [1,0]:INFO:root:Epoch[17] Batch[800] Loss[5.239] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[800] rmse=0.022004 lr=0.397764 [1,0]:INFO:root:Epoch[17] Batch[900] Loss[3.770] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[900] rmse=0.021979 lr=0.397736 [1,0]:INFO:root:Epoch[17] Batch[1000] Loss[3.259] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[1000] rmse=0.021994 lr=0.397707 [1,0]:INFO:root:Epoch[17] Batch[1100] Loss[3.649] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[1100] rmse=0.021984 lr=0.397678 [1,0]:INFO:root:Epoch[17] Batch[1200] Loss[3.263] [1,0]:INFO:root:Epoch[17] Rank[0] Batch[1200] rmse=0.021987 lr=0.397650 [1,0]:INFO:root:Epoch[17] Rank[0] Batch[1251] Time cost=399.92 Train-metric=0.022010 [1,0]:INFO:root:Epoch[17] Speed: 3203.17 samples/sec [1,0]:INFO:root:Epoch[18] Batch[100] Loss[3.043] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[100] rmse=0.021863 lr=0.397606 [1,0]:INFO:root:Epoch[18] Batch[200] Loss[3.053] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[200] rmse=0.022005 lr=0.397576 [1,0]:INFO:root:Epoch[18] Batch[300] Loss[2.985] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[300] rmse=0.021939 lr=0.397547 [1,0]:INFO:root:Epoch[18] Batch[400] Loss[4.045] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[400] rmse=0.021948 lr=0.397517 [1,0]:INFO:root:Epoch[18] Batch[500] Loss[5.039] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[500] rmse=0.021975 lr=0.397487 [1,0]:INFO:root:Epoch[18] Batch[600] Loss[2.967] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[600] rmse=0.021975 lr=0.397457 [1,0]:INFO:root:Epoch[18] Batch[700] Loss[3.036] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[700] rmse=0.021987 lr=0.397427 [1,0]:INFO:root:Epoch[18] Batch[800] Loss[3.749] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[800] rmse=0.021974 lr=0.397397 [1,0]:INFO:root:Epoch[18] Batch[900] Loss[3.229] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[900] rmse=0.021985 lr=0.397366 [1,0]:INFO:root:Epoch[18] Batch[1000] Loss[5.431] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[1000] rmse=0.022002 lr=0.397336 [1,0]:INFO:root:Epoch[18] Batch[1100] Loss[3.071] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[1100] rmse=0.022003 lr=0.397305 [1,0]:INFO:root:Epoch[18] Batch[1200] Loss[4.766] [1,0]:INFO:root:Epoch[18] Rank[0] Batch[1200] rmse=0.021982 lr=0.397274 [1,0]:INFO:root:Epoch[18] Rank[0] Batch[1251] Time cost=398.49 Train-metric=0.021985 [1,0]:INFO:root:Epoch[18] Speed: 3214.67 samples/sec [1,0]:INFO:root:Epoch[19] Batch[100] Loss[4.358] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[100] rmse=0.021834 lr=0.397226 [1,0]:INFO:root:Epoch[19] Batch[200] Loss[3.482] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[200] rmse=0.021800 lr=0.397195 [1,0]:INFO:root:Epoch[19] Batch[300] Loss[5.413] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[300] rmse=0.021915 lr=0.397163 [1,0]:INFO:root:Epoch[19] Batch[400] Loss[3.350] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[400] rmse=0.021878 lr=0.397131 [1,0]:INFO:root:Epoch[19] Batch[500] Loss[3.488] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[500] rmse=0.021988 lr=0.397099 [1,0]:INFO:root:Epoch[19] Batch[600] Loss[4.054] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[600] rmse=0.021979 lr=0.397067 [1,0]:INFO:root:Epoch[19] Batch[700] Loss[4.175] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[700] rmse=0.021950 lr=0.397035 [1,0]:INFO:root:Epoch[19] Batch[800] Loss[5.061] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[800] rmse=0.021959 lr=0.397002 [1,0]:INFO:root:Epoch[19] Batch[900] Loss[3.687] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[900] rmse=0.021938 lr=0.396969 [1,0]:INFO:root:Epoch[19] Batch[1000] Loss[4.217] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[1000] rmse=0.021934 lr=0.396936 [1,0]:INFO:root:Epoch[19] Batch[1100] Loss[3.307] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[1100] rmse=0.021942 lr=0.396903 [1,0]:INFO:root:Epoch[19] Batch[1200] Loss[5.301] [1,0]:INFO:root:Epoch[19] Rank[0] Batch[1200] rmse=0.021950 lr=0.396870 [1,0]:INFO:root:Epoch[19] Rank[0] Batch[1251] Time cost=398.56 Train-metric=0.021955 [1,0]:INFO:root:Epoch[19] Speed: 3214.15 samples/sec [1,0]:INFO:root:Epoch[19] Rank[0] Validation-accuracy=0.520360 Validation-top_k_accuracy_5=0.771700 [1,0]:INFO:root:Epoch[20] Batch[100] Loss[5.320] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[100] rmse=0.021871 lr=0.396819 [1,0]:INFO:root:Epoch[20] Batch[200] Loss[4.256] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[200] rmse=0.021810 lr=0.396786 [1,0]:INFO:root:Epoch[20] Batch[300] Loss[3.095] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[300] rmse=0.021807 lr=0.396752 [1,0]:INFO:root:Epoch[20] Batch[400] Loss[4.537] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[400] rmse=0.021855 lr=0.396718 [1,0]:INFO:root:Epoch[20] Batch[500] Loss[3.472] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[500] rmse=0.021861 lr=0.396683 [1,0]:INFO:root:Epoch[20] Batch[600] Loss[3.335] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[600] rmse=0.021881 lr=0.396649 [1,0]:INFO:root:Epoch[20] Batch[700] Loss[3.083] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[700] rmse=0.021902 lr=0.396614 [1,0]:INFO:root:Epoch[20] Batch[800] Loss[2.994] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[800] rmse=0.021921 lr=0.396579 [1,0]:INFO:root:Epoch[20] Batch[900] Loss[3.718] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[900] rmse=0.021925 lr=0.396544 [1,0]:INFO:root:Epoch[20] Batch[1000] Loss[5.136] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[1000] rmse=0.021916 lr=0.396509 [1,0]:INFO:root:Epoch[20] Batch[1100] Loss[2.796] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[1100] rmse=0.021905 lr=0.396474 [1,0]:INFO:root:Epoch[20] Batch[1200] Loss[3.127] [1,0]:INFO:root:Epoch[20] Rank[0] Batch[1200] rmse=0.021907 lr=0.396439 [1,0]:INFO:root:Epoch[20] Rank[0] Batch[1251] Time cost=398.88 Train-metric=0.021911 [1,0]:INFO:root:Epoch[20] Speed: 3211.57 samples/sec [1,0]:INFO:root:Epoch[21] Batch[100] Loss[2.865] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[100] rmse=0.021725 lr=0.396385 [1,0]:INFO:root:Epoch[21] Batch[200] Loss[4.129] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[200] rmse=0.021709 lr=0.396349 [1,0]:INFO:root:Epoch[21] Batch[300] Loss[3.403] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[300] rmse=0.021724 lr=0.396313 [1,0]:INFO:root:Epoch[21] Batch[400] Loss[3.354] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[400] rmse=0.021786 lr=0.396276 [1,0]:INFO:root:Epoch[21] Batch[500] Loss[4.422] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[500] rmse=0.021802 lr=0.396240 [1,0]:INFO:root:Epoch[21] Batch[600] Loss[4.552] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[600] rmse=0.021832 lr=0.396203 [1,0]:INFO:root:Epoch[21] Batch[700] Loss[5.124] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[700] rmse=0.021880 lr=0.396166 [1,0]:INFO:root:Epoch[21] Batch[800] Loss[3.032] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[800] rmse=0.021924 lr=0.396129 [1,0]:INFO:root:Epoch[21] Batch[900] Loss[4.796] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[900] rmse=0.021964 lr=0.396092 [1,0]:INFO:root:Epoch[21] Batch[1000] Loss[3.081] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[1000] rmse=0.021949 lr=0.396055 [1,0]:INFO:root:Epoch[21] Batch[1100] Loss[4.386] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[1100] rmse=0.021940 lr=0.396017 [1,0]:INFO:root:Epoch[21] Batch[1200] Loss[3.084] [1,0]:INFO:root:Epoch[21] Rank[0] Batch[1200] rmse=0.021947 lr=0.395980 [1,0]:INFO:root:Epoch[21] Rank[0] Batch[1251] Time cost=399.05 Train-metric=0.021936 [1,0]:INFO:root:Epoch[21] Speed: 3210.16 samples/sec [1,0]:INFO:root:Epoch[22] Batch[100] Loss[4.694] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[100] rmse=0.021702 lr=0.395922 [1,0]:INFO:root:Epoch[22] Batch[200] Loss[3.172] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[200] rmse=0.021783 lr=0.395884 [1,0]:INFO:root:Epoch[22] Batch[300] Loss[5.290] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[300] rmse=0.021784 lr=0.395846 [1,0]:INFO:root:Epoch[22] Batch[400] Loss[5.300] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[400] rmse=0.021820 lr=0.395807 [1,0]:INFO:root:Epoch[22] Batch[500] Loss[5.284] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[500] rmse=0.021811 lr=0.395769 [1,0]:INFO:root:Epoch[22] Batch[600] Loss[2.996] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[600] rmse=0.021833 lr=0.395730 [1,0]:INFO:root:Epoch[22] Batch[700] Loss[5.554] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[700] rmse=0.021843 lr=0.395691 [1,0]:INFO:root:Epoch[22] Batch[800] Loss[3.022] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[800] rmse=0.021845 lr=0.395652 [1,0]:INFO:root:Epoch[22] Batch[900] Loss[5.453] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[900] rmse=0.021843 lr=0.395612 [1,0]:INFO:root:Epoch[22] Batch[1000] Loss[2.950] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[1000] rmse=0.021851 lr=0.395573 [1,0]:INFO:root:Epoch[22] Batch[1100] Loss[3.818] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[1100] rmse=0.021845 lr=0.395533 [1,0]:INFO:root:Epoch[22] Batch[1200] Loss[3.454] [1,0]:INFO:root:Epoch[22] Rank[0] Batch[1200] rmse=0.021867 lr=0.395493 [1,0]:INFO:root:Epoch[22] Rank[0] Batch[1251] Time cost=398.56 Train-metric=0.021867 [1,0]:INFO:root:Epoch[22] Speed: 3214.13 samples/sec [1,0]:INFO:root:Epoch[23] Batch[100] Loss[2.999] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[100] rmse=0.022039 lr=0.395433 [1,0]:INFO:root:Epoch[23] Batch[200] Loss[5.396] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[200] rmse=0.021939 lr=0.395392 [1,0]:INFO:root:Epoch[23] Batch[300] Loss[3.292] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[300] rmse=0.021934 lr=0.395352 [1,0]:INFO:root:Epoch[23] Batch[400] Loss[3.039] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[400] rmse=0.021894 lr=0.395311 [1,0]:INFO:root:Epoch[23] Batch[500] Loss[3.190] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[500] rmse=0.021860 lr=0.395270 [1,0]:INFO:root:Epoch[23] Batch[600] Loss[3.744] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[600] rmse=0.021849 lr=0.395229 [1,0]:INFO:root:Epoch[23] Batch[700] Loss[3.085] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[700] rmse=0.021849 lr=0.395188 [1,0]:INFO:root:Epoch[23] Batch[800] Loss[5.211] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[800] rmse=0.021861 lr=0.395147 [1,0]:INFO:root:Epoch[23] Batch[900] Loss[3.249] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[900] rmse=0.021846 lr=0.395105 [1,0]:INFO:root:Epoch[23] Batch[1000] Loss[3.691] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[1000] rmse=0.021847 lr=0.395063 [1,0]:INFO:root:Epoch[23] Batch[1100] Loss[3.050] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[1100] rmse=0.021846 lr=0.395022 [1,0]:INFO:root:Epoch[23] Batch[1200] Loss[3.155] [1,0]:INFO:root:Epoch[23] Rank[0] Batch[1200] rmse=0.021863 lr=0.394980 [1,0]:INFO:root:Epoch[23] Rank[0] Batch[1251] Time cost=399.75 Train-metric=0.021868 [1,0]:INFO:root:Epoch[23] Speed: 3204.53 samples/sec [1,0]:INFO:root:Epoch[24] Batch[100] Loss[5.036] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[100] rmse=0.021684 lr=0.394916 [1,0]:INFO:root:Epoch[24] Batch[200] Loss[3.987] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[200] rmse=0.021698 lr=0.394873 [1,0]:INFO:root:Epoch[24] Batch[300] Loss[3.061] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[300] rmse=0.021754 lr=0.394830 [1,0]:INFO:root:Epoch[24] Batch[400] Loss[5.363] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[400] rmse=0.021748 lr=0.394788 [1,0]:INFO:root:Epoch[24] Batch[500] Loss[3.378] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[500] rmse=0.021795 lr=0.394745 [1,0]:INFO:root:Epoch[24] Batch[600] Loss[3.448] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[600] rmse=0.021809 lr=0.394701 [1,0]:INFO:root:Epoch[24] Batch[700] Loss[3.718] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[700] rmse=0.021783 lr=0.394658 [1,0]:INFO:root:Epoch[24] Batch[800] Loss[3.143] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[800] rmse=0.021799 lr=0.394614 [1,0]:INFO:root:Epoch[24] Batch[900] Loss[3.238] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[900] rmse=0.021799 lr=0.394571 [1,0]:INFO:root:Epoch[24] Batch[1000] Loss[3.088] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[1000] rmse=0.021813 lr=0.394527 [1,0]:INFO:root:Epoch[24] Batch[1100] Loss[4.933] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[1100] rmse=0.021819 lr=0.394483 [1,0]:INFO:root:Epoch[24] Batch[1200] Loss[3.643] [1,0]:INFO:root:Epoch[24] Rank[0] Batch[1200] rmse=0.021828 lr=0.394438 [1,0]:INFO:root:Epoch[24] Rank[0] Batch[1251] Time cost=397.96 Train-metric=0.021828 [1,0]:INFO:root:Epoch[24] Speed: 3218.94 samples/sec [1,0]:INFO:root:Epoch[24] Rank[0] Validation-accuracy=0.531960 Validation-top_k_accuracy_5=0.786180 [1,0]:INFO:root:Epoch[25] Batch[100] Loss[3.232] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[100] rmse=0.021801 lr=0.394371 [1,0]:INFO:root:Epoch[25] Batch[200] Loss[3.391] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[200] rmse=0.021717 lr=0.394326 [1,0]:INFO:root:Epoch[25] Batch[300] Loss[5.224] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[300] rmse=0.021775 lr=0.394282 [1,0]:INFO:root:Epoch[25] Batch[400] Loss[3.211] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[400] rmse=0.021759 lr=0.394237 [1,0]:INFO:root:Epoch[25] Batch[500] Loss[3.630] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[500] rmse=0.021729 lr=0.394191 [1,0]:INFO:root:Epoch[25] Batch[600] Loss[4.279] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[600] rmse=0.021727 lr=0.394146 [1,0]:INFO:root:Epoch[25] Batch[700] Loss[3.589] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[700] rmse=0.021715 lr=0.394100 [1,0]:INFO:root:Epoch[25] Batch[800] Loss[4.696] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[800] rmse=0.021719 lr=0.394055 [1,0]:INFO:root:Epoch[25] Batch[900] Loss[3.439] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[900] rmse=0.021745 lr=0.394009 [1,0]:INFO:root:Epoch[25] Batch[1000] Loss[3.385] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[1000] rmse=0.021767 lr=0.393963 [1,0]:INFO:root:Epoch[25] Batch[1100] Loss[3.410] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[1100] rmse=0.021769 lr=0.393916 [1,0]:INFO:root:Epoch[25] Batch[1200] Loss[3.544] [1,0]:INFO:root:Epoch[25] Rank[0] Batch[1200] rmse=0.021775 lr=0.393870 [1,0]:INFO:root:Epoch[25] Rank[0] Batch[1251] Time cost=400.61 Train-metric=0.021783 [1,0]:INFO:root:Epoch[25] Speed: 3197.65 samples/sec [1,0]:INFO:root:Epoch[26] Batch[100] Loss[4.576] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[100] rmse=0.021780 lr=0.393799 [1,0]:INFO:root:Epoch[26] Batch[200] Loss[4.336] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[200] rmse=0.021708 lr=0.393753 [1,0]:INFO:root:Epoch[26] Batch[300] Loss[3.887] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[300] rmse=0.021696 lr=0.393706 [1,0]:INFO:root:Epoch[26] Batch[400] Loss[3.050] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[400] rmse=0.021691 lr=0.393658 [1,0]:INFO:root:Epoch[26] Batch[500] Loss[3.177] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[500] rmse=0.021727 lr=0.393611 [1,0]:INFO:root:Epoch[26] Batch[600] Loss[3.246] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[600] rmse=0.021715 lr=0.393563 [1,0]:INFO:root:Epoch[26] Batch[700] Loss[3.092] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[700] rmse=0.021703 lr=0.393516 [1,0]:INFO:root:Epoch[26] Batch[800] Loss[4.335] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[800] rmse=0.021717 lr=0.393468 [1,0]:INFO:root:Epoch[26] Batch[900] Loss[3.080] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[900] rmse=0.021710 lr=0.393420 [1,0]:INFO:root:Epoch[26] Batch[1000] Loss[4.177] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[1000] rmse=0.021726 lr=0.393371 [1,0]:INFO:root:Epoch[26] Batch[1100] Loss[3.319] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[1100] rmse=0.021732 lr=0.393323 [1,0]:INFO:root:Epoch[26] Batch[1200] Loss[5.379] [1,0]:INFO:root:Epoch[26] Rank[0] Batch[1200] rmse=0.021722 lr=0.393274 [1,0]:INFO:root:Epoch[26] Rank[0] Batch[1251] Time cost=401.40 Train-metric=0.021729 [1,0]:INFO:root:Epoch[26] Speed: 3191.40 samples/sec [1,0]:INFO:root:Epoch[27] Batch[100] Loss[3.422] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[100] rmse=0.021691 lr=0.393201 [1,0]:INFO:root:Epoch[27] Batch[200] Loss[3.013] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[200] rmse=0.021716 lr=0.393151 [1,0]:INFO:root:Epoch[27] Batch[300] Loss[4.942] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[300] rmse=0.021790 lr=0.393102 [1,0]:INFO:root:Epoch[27] Batch[400] Loss[2.949] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[400] rmse=0.021801 lr=0.393053 [1,0]:INFO:root:Epoch[27] Batch[500] Loss[5.223] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[500] rmse=0.021791 lr=0.393003 [1,0]:INFO:root:Epoch[27] Batch[600] Loss[3.337] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[600] rmse=0.021785 lr=0.392954 [1,0]:INFO:root:Epoch[27] Batch[700] Loss[4.276] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[700] rmse=0.021793 lr=0.392904 [1,0]:INFO:root:Epoch[27] Batch[800] Loss[3.160] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[800] rmse=0.021771 lr=0.392854 [1,0]:INFO:root:Epoch[27] Batch[900] Loss[3.163] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[900] rmse=0.021785 lr=0.392803 [1,0]:INFO:root:Epoch[27] Batch[1000] Loss[4.486] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[1000] rmse=0.021788 lr=0.392753 [1,0]:INFO:root:Epoch[27] Batch[1100] Loss[3.190] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[1100] rmse=0.021798 lr=0.392702 [1,0]:INFO:root:Epoch[27] Batch[1200] Loss[3.578] [1,0]:INFO:root:Epoch[27] Rank[0] Batch[1200] rmse=0.021798 lr=0.392651 [1,0]:INFO:root:Epoch[27] Rank[0] Batch[1251] Time cost=401.40 Train-metric=0.021795 [1,0]:INFO:root:Epoch[27] Speed: 3191.42 samples/sec [1,0]:INFO:root:Epoch[28] Batch[100] Loss[5.269] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[100] rmse=0.021677 lr=0.392574 [1,0]:INFO:root:Epoch[28] Batch[200] Loss[2.825] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[200] rmse=0.021690 lr=0.392523 [1,0]:INFO:root:Epoch[28] Batch[300] Loss[3.059] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[300] rmse=0.021718 lr=0.392472 [1,0]:INFO:root:Epoch[28] Batch[400] Loss[2.957] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[400] rmse=0.021699 lr=0.392420 [1,0]:INFO:root:Epoch[28] Batch[500] Loss[5.123] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[500] rmse=0.021707 lr=0.392369 [1,0]:INFO:root:Epoch[28] Batch[600] Loss[3.092] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[600] rmse=0.021719 lr=0.392317 [1,0]:INFO:root:Epoch[28] Batch[700] Loss[2.837] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[700] rmse=0.021707 lr=0.392265 [1,0]:INFO:root:Epoch[28] Batch[800] Loss[3.152] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[800] rmse=0.021726 lr=0.392212 [1,0]:INFO:root:Epoch[28] Batch[900] Loss[4.948] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[900] rmse=0.021745 lr=0.392160 [1,0]:INFO:root:Epoch[28] Batch[1000] Loss[3.132] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[1000] rmse=0.021750 lr=0.392107 [1,0]:INFO:root:Epoch[28] Batch[1100] Loss[3.140] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[1100] rmse=0.021757 lr=0.392055 [1,0]:INFO:root:Epoch[28] Batch[1200] Loss[4.988] [1,0]:INFO:root:Epoch[28] Rank[0] Batch[1200] rmse=0.021743 lr=0.392002 [1,0]:INFO:root:Epoch[28] Rank[0] Batch[1251] Time cost=399.29 Train-metric=0.021753 [1,0]:INFO:root:Epoch[28] Speed: 3208.22 samples/sec [1,0]:INFO:root:Epoch[29] Batch[100] Loss[3.385] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[100] rmse=0.021609 lr=0.391921 [1,0]:INFO:root:Epoch[29] Batch[200] Loss[3.850] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[200] rmse=0.021547 lr=0.391868 [1,0]:INFO:root:Epoch[29] Batch[300] Loss[3.472] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[300] rmse=0.021542 lr=0.391815 [1,0]:INFO:root:Epoch[29] Batch[400] Loss[4.938] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[400] rmse=0.021587 lr=0.391761 [1,0]:INFO:root:Epoch[29] Batch[500] Loss[2.865] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[500] rmse=0.021614 lr=0.391707 [1,0]:INFO:root:Epoch[29] Batch[600] Loss[3.397] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[600] rmse=0.021649 lr=0.391653 [1,0]:INFO:root:Epoch[29] Batch[700] Loss[2.957] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[700] rmse=0.021642 lr=0.391599 [1,0]:INFO:root:Epoch[29] Batch[800] Loss[5.390] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[800] rmse=0.021642 lr=0.391544 [1,0]:INFO:root:Epoch[29] Batch[900] Loss[3.185] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[900] rmse=0.021666 lr=0.391490 [1,0]:INFO:root:Epoch[29] Batch[1000] Loss[4.188] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[1000] rmse=0.021668 lr=0.391435 [1,0]:INFO:root:Epoch[29] Batch[1100] Loss[3.405] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[1100] rmse=0.021664 lr=0.391380 [1,0]:INFO:root:Epoch[29] Batch[1200] Loss[5.060] [1,0]:INFO:root:Epoch[29] Rank[0] Batch[1200] rmse=0.021672 lr=0.391325 [1,0]:INFO:root:Epoch[29] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.021676 [1,0]:INFO:root:Epoch[29] Speed: 3207.90 samples/sec [1,0]:INFO:root:Epoch[29] Rank[0] Validation-accuracy=0.528540 Validation-top_k_accuracy_5=0.779280 [1,0]:INFO:root:Epoch[30] Batch[100] Loss[3.367] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[100] rmse=0.021622 lr=0.391241 [1,0]:INFO:root:Epoch[30] Batch[200] Loss[3.122] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[200] rmse=0.021615 lr=0.391186 [1,0]:INFO:root:Epoch[30] Batch[300] Loss[3.229] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[300] rmse=0.021658 lr=0.391130 [1,0]:INFO:root:Epoch[30] Batch[400] Loss[3.281] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[400] rmse=0.021680 lr=0.391074 [1,0]:INFO:root:Epoch[30] Batch[500] Loss[2.971] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[500] rmse=0.021671 lr=0.391018 [1,0]:INFO:root:Epoch[30] Batch[600] Loss[3.153] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[600] rmse=0.021644 lr=0.390962 [1,0]:INFO:root:Epoch[30] Batch[700] Loss[3.551] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[700] rmse=0.021703 lr=0.390906 [1,0]:INFO:root:Epoch[30] Batch[800] Loss[5.289] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[800] rmse=0.021697 lr=0.390849 [1,0]:INFO:root:Epoch[30] Batch[900] Loss[2.833] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[900] rmse=0.021680 lr=0.390793 [1,0]:INFO:root:Epoch[30] Batch[1000] Loss[4.397] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[1000] rmse=0.021655 lr=0.390736 [1,0]:INFO:root:Epoch[30] Batch[1100] Loss[2.828] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[1100] rmse=0.021675 lr=0.390679 [1,0]:INFO:root:Epoch[30] Batch[1200] Loss[3.132] [1,0]:INFO:root:Epoch[30] Rank[0] Batch[1200] rmse=0.021669 lr=0.390621 [1,0]:INFO:root:Epoch[30] Rank[0] Batch[1251] Time cost=399.77 Train-metric=0.021684 [1,0]:INFO:root:Epoch[30] Speed: 3204.37 samples/sec [1,0]:INFO:root:Epoch[31] Batch[100] Loss[5.485] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[100] rmse=0.021377 lr=0.390535 [1,0]:INFO:root:Epoch[31] Batch[200] Loss[3.068] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[200] rmse=0.021335 lr=0.390477 [1,0]:INFO:root:Epoch[31] Batch[300] Loss[3.336] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[300] rmse=0.021432 lr=0.390419 [1,0]:INFO:root:Epoch[31] Batch[400] Loss[4.108] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[400] rmse=0.021479 lr=0.390361 [1,0]:INFO:root:Epoch[31] Batch[500] Loss[5.506] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[500] rmse=0.021523 lr=0.390303 [1,0]:INFO:root:Epoch[31] Batch[600] Loss[3.279] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[600] rmse=0.021554 lr=0.390245 [1,0]:INFO:root:Epoch[31] Batch[700] Loss[3.336] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[700] rmse=0.021600 lr=0.390186 [1,0]:INFO:root:Epoch[31] Batch[800] Loss[3.369] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[800] rmse=0.021625 lr=0.390127 [1,0]:INFO:root:Epoch[31] Batch[900] Loss[5.516] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[900] rmse=0.021647 lr=0.390069 [1,0]:INFO:root:Epoch[31] Batch[1000] Loss[2.971] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[1000] rmse=0.021668 lr=0.390010 [1,0]:INFO:root:Epoch[31] Batch[1100] Loss[5.113] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[1100] rmse=0.021671 lr=0.389950 [1,0]:INFO:root:Epoch[31] Batch[1200] Loss[3.086] [1,0]:INFO:root:Epoch[31] Rank[0] Batch[1200] rmse=0.021680 lr=0.389891 [1,0]:INFO:root:Epoch[31] Rank[0] Batch[1251] Time cost=398.77 Train-metric=0.021670 [1,0]:INFO:root:Epoch[31] Speed: 3212.43 samples/sec [1,0]:INFO:root:Epoch[32] Batch[100] Loss[2.988] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[100] rmse=0.021437 lr=0.389801 [1,0]:INFO:root:Epoch[32] Batch[200] Loss[5.326] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[200] rmse=0.021553 lr=0.389741 [1,0]:INFO:root:Epoch[32] Batch[300] Loss[3.390] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[300] rmse=0.021707 lr=0.389681 [1,0]:INFO:root:Epoch[32] Batch[400] Loss[3.074] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[400] rmse=0.021672 lr=0.389621 [1,0]:INFO:root:Epoch[32] Batch[500] Loss[3.141] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[500] rmse=0.021618 lr=0.389561 [1,0]:INFO:root:Epoch[32] Batch[600] Loss[4.619] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[600] rmse=0.021597 lr=0.389500 [1,0]:INFO:root:Epoch[32] Batch[700] Loss[4.071] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[700] rmse=0.021616 lr=0.389440 [1,0]:INFO:root:Epoch[32] Batch[800] Loss[3.171] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[800] rmse=0.021625 lr=0.389379 [1,0]:INFO:root:Epoch[32] Batch[900] Loss[3.087] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[900] rmse=0.021655 lr=0.389318 [1,0]:INFO:root:Epoch[32] Batch[1000] Loss[3.725] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[1000] rmse=0.021665 lr=0.389257 [1,0]:INFO:root:Epoch[32] Batch[1100] Loss[3.324] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[1100] rmse=0.021667 lr=0.389196 [1,0]:INFO:root:Epoch[32] Batch[1200] Loss[3.290] [1,0]:INFO:root:Epoch[32] Rank[0] Batch[1200] rmse=0.021673 lr=0.389134 [1,0]:INFO:root:Epoch[32] Rank[0] Batch[1251] Time cost=399.25 Train-metric=0.021692 [1,0]:INFO:root:Epoch[32] Speed: 3208.57 samples/sec [1,0]:INFO:root:Epoch[33] Batch[100] Loss[3.613] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[100] rmse=0.021639 lr=0.389041 [1,0]:INFO:root:Epoch[33] Batch[200] Loss[3.054] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[200] rmse=0.021776 lr=0.388979 [1,0]:INFO:root:Epoch[33] Batch[300] Loss[2.739] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[300] rmse=0.021620 lr=0.388917 [1,0]:INFO:root:Epoch[33] Batch[400] Loss[3.896] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[400] rmse=0.021627 lr=0.388855 [1,0]:INFO:root:Epoch[33] Batch[500] Loss[3.448] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[500] rmse=0.021665 lr=0.388792 [1,0]:INFO:root:Epoch[33] Batch[600] Loss[2.614] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[600] rmse=0.021650 lr=0.388730 [1,0]:INFO:root:Epoch[33] Batch[700] Loss[3.200] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[700] rmse=0.021649 lr=0.388667 [1,0]:INFO:root:Epoch[33] Batch[800] Loss[4.616] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[800] rmse=0.021658 lr=0.388604 [1,0]:INFO:root:Epoch[33] Batch[900] Loss[3.096] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[900] rmse=0.021667 lr=0.388541 [1,0]:INFO:root:Epoch[33] Batch[1000] Loss[3.212] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[1000] rmse=0.021645 lr=0.388478 [1,0]:INFO:root:Epoch[33] Batch[1100] Loss[4.747] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[1100] rmse=0.021634 lr=0.388414 [1,0]:INFO:root:Epoch[33] Batch[1200] Loss[5.062] [1,0]:INFO:root:Epoch[33] Rank[0] Batch[1200] rmse=0.021640 lr=0.388351 [1,0]:INFO:root:Epoch[33] Rank[0] Batch[1251] Time cost=400.03 Train-metric=0.021633 [1,0]:INFO:root:Epoch[33] Speed: 3202.30 samples/sec [1,0]:INFO:root:Epoch[34] Batch[100] Loss[3.578] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[100] rmse=0.021445 lr=0.388254 [1,0]:INFO:root:Epoch[34] Batch[200] Loss[3.028] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[200] rmse=0.021628 lr=0.388190 [1,0]:INFO:root:Epoch[34] Batch[300] Loss[5.139] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[300] rmse=0.021581 lr=0.388126 [1,0]:INFO:root:Epoch[34] Batch[400] Loss[3.252] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[400] rmse=0.021562 lr=0.388062 [1,0]:INFO:root:Epoch[34] Batch[500] Loss[2.988] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[500] rmse=0.021583 lr=0.387997 [1,0]:INFO:root:Epoch[34] Batch[600] Loss[3.402] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[600] rmse=0.021617 lr=0.387932 [1,0]:INFO:root:Epoch[34] Batch[700] Loss[3.462] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[700] rmse=0.021659 lr=0.387867 [1,0]:INFO:root:Epoch[34] Batch[800] Loss[4.733] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[800] rmse=0.021652 lr=0.387802 [1,0]:INFO:root:Epoch[34] Batch[900] Loss[2.974] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[900] rmse=0.021657 lr=0.387737 [1,0]:INFO:root:Epoch[34] Batch[1000] Loss[3.122] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[1000] rmse=0.021657 lr=0.387672 [1,0]:INFO:root:Epoch[34] Batch[1100] Loss[2.892] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[1100] rmse=0.021660 lr=0.387606 [1,0]:INFO:root:Epoch[34] Batch[1200] Loss[3.926] [1,0]:INFO:root:Epoch[34] Rank[0] Batch[1200] rmse=0.021658 lr=0.387541 [1,0]:INFO:root:Epoch[34] Rank[0] Batch[1251] Time cost=398.93 Train-metric=0.021650 [1,0]:INFO:root:Epoch[34] Speed: 3211.12 samples/sec [1,0]:INFO:root:Epoch[34] Rank[0] Validation-accuracy=0.527620 Validation-top_k_accuracy_5=0.777500 [1,0]:INFO:root:Epoch[35] Batch[100] Loss[3.109] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[100] rmse=0.021580 lr=0.387441 [1,0]:INFO:root:Epoch[35] Batch[200] Loss[3.179] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[200] rmse=0.021598 lr=0.387375 [1,0]:INFO:root:Epoch[35] Batch[300] Loss[4.290] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[300] rmse=0.021584 lr=0.387309 [1,0]:INFO:root:Epoch[35] Batch[400] Loss[5.357] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[400] rmse=0.021572 lr=0.387242 [1,0]:INFO:root:Epoch[35] Batch[500] Loss[4.644] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[500] rmse=0.021592 lr=0.387175 [1,0]:INFO:root:Epoch[35] Batch[600] Loss[4.162] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[600] rmse=0.021612 lr=0.387109 [1,0]:INFO:root:Epoch[35] Batch[700] Loss[3.281] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[700] rmse=0.021628 lr=0.387042 [1,0]:INFO:root:Epoch[35] Batch[800] Loss[2.848] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[800] rmse=0.021617 lr=0.386975 [1,0]:INFO:root:Epoch[35] Batch[900] Loss[3.067] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[900] rmse=0.021625 lr=0.386907 [1,0]:INFO:root:Epoch[35] Batch[1000] Loss[5.481] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[1000] rmse=0.021626 lr=0.386840 [1,0]:INFO:root:Epoch[35] Batch[1100] Loss[5.499] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[1100] rmse=0.021634 lr=0.386772 [1,0]:INFO:root:Epoch[35] Batch[1200] Loss[2.835] [1,0]:INFO:root:Epoch[35] Rank[0] Batch[1200] rmse=0.021643 lr=0.386704 [1,0]:INFO:root:Epoch[35] Rank[0] Batch[1251] Time cost=398.47 Train-metric=0.021646 [1,0]:INFO:root:Epoch[35] Speed: 3214.83 samples/sec [1,0]:INFO:root:Epoch[36] Batch[100] Loss[3.050] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[100] rmse=0.021742 lr=0.386602 [1,0]:INFO:root:Epoch[36] Batch[200] Loss[3.186] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[200] rmse=0.021698 lr=0.386533 [1,0]:INFO:root:Epoch[36] Batch[300] Loss[3.596] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[300] rmse=0.021570 lr=0.386465 [1,0]:INFO:root:Epoch[36] Batch[400] Loss[3.081] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[400] rmse=0.021617 lr=0.386396 [1,0]:INFO:root:Epoch[36] Batch[500] Loss[3.158] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[500] rmse=0.021630 lr=0.386328 [1,0]:INFO:root:Epoch[36] Batch[600] Loss[3.006] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[600] rmse=0.021616 lr=0.386259 [1,0]:INFO:root:Epoch[36] Batch[700] Loss[2.765] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[700] rmse=0.021599 lr=0.386190 [1,0]:INFO:root:Epoch[36] Batch[800] Loss[3.239] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[800] rmse=0.021606 lr=0.386120 [1,0]:INFO:root:Epoch[36] Batch[900] Loss[4.161] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[900] rmse=0.021610 lr=0.386051 [1,0]:INFO:root:Epoch[36] Batch[1000] Loss[3.293] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[1000] rmse=0.021631 lr=0.385981 [1,0]:INFO:root:Epoch[36] Batch[1100] Loss[5.489] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[1100] rmse=0.021611 lr=0.385912 [1,0]:INFO:root:Epoch[36] Batch[1200] Loss[3.092] [1,0]:INFO:root:Epoch[36] Rank[0] Batch[1200] rmse=0.021618 lr=0.385842 [1,0]:INFO:root:Epoch[36] Rank[0] Batch[1251] Time cost=398.76 Train-metric=0.021616 [1,0]:INFO:root:Epoch[36] Speed: 3212.56 samples/sec [1,0]:INFO:root:Epoch[37] Batch[100] Loss[4.797] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[100] rmse=0.021470 lr=0.385736 [1,0]:INFO:root:Epoch[37] Batch[200] Loss[3.284] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[200] rmse=0.021568 lr=0.385666 [1,0]:INFO:root:Epoch[37] Batch[300] Loss[3.616] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[300] rmse=0.021561 lr=0.385595 [1,0]:INFO:root:Epoch[37] Batch[400] Loss[3.180] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[400] rmse=0.021581 lr=0.385525 [1,0]:INFO:root:Epoch[37] Batch[500] Loss[2.869] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[500] rmse=0.021610 lr=0.385454 [1,0]:INFO:root:Epoch[37] Batch[600] Loss[5.556] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[600] rmse=0.021579 lr=0.385383 [1,0]:INFO:root:Epoch[37] Batch[700] Loss[4.711] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[700] rmse=0.021568 lr=0.385312 [1,0]:INFO:root:Epoch[37] Batch[800] Loss[3.531] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[800] rmse=0.021571 lr=0.385240 [1,0]:INFO:root:Epoch[37] Batch[900] Loss[4.009] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[900] rmse=0.021541 lr=0.385169 [1,0]:INFO:root:Epoch[37] Batch[1000] Loss[4.705] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[1000] rmse=0.021554 lr=0.385097 [1,0]:INFO:root:Epoch[37] Batch[1100] Loss[3.148] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[1100] rmse=0.021562 lr=0.385025 [1,0]:INFO:root:Epoch[37] Batch[1200] Loss[3.411] [1,0]:INFO:root:Epoch[37] Rank[0] Batch[1200] rmse=0.021562 lr=0.384953 [1,0]:INFO:root:Epoch[37] Rank[0] Batch[1251] Time cost=398.62 Train-metric=0.021567 [1,0]:INFO:root:Epoch[37] Speed: 3213.61 samples/sec [1,0]:INFO:root:Epoch[38] Batch[100] Loss[3.171] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[100] rmse=0.021414 lr=0.384844 [1,0]:INFO:root:Epoch[38] Batch[200] Loss[2.977] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[200] rmse=0.021451 lr=0.384772 [1,0]:INFO:root:Epoch[38] Batch[300] Loss[3.006] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[300] rmse=0.021467 lr=0.384699 [1,0]:INFO:root:Epoch[38] Batch[400] Loss[5.053] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[400] rmse=0.021507 lr=0.384627 [1,0]:INFO:root:Epoch[38] Batch[500] Loss[3.713] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[500] rmse=0.021541 lr=0.384554 [1,0]:INFO:root:Epoch[38] Batch[600] Loss[3.996] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[600] rmse=0.021569 lr=0.384481 [1,0]:INFO:root:Epoch[38] Batch[700] Loss[4.842] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[700] rmse=0.021568 lr=0.384407 [1,0]:INFO:root:Epoch[38] Batch[800] Loss[2.947] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[800] rmse=0.021574 lr=0.384334 [1,0]:INFO:root:Epoch[38] Batch[900] Loss[3.508] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[900] rmse=0.021572 lr=0.384260 [1,0]:INFO:root:Epoch[38] Batch[1000] Loss[3.033] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[1000] rmse=0.021563 lr=0.384187 [1,0]:INFO:root:Epoch[38] Batch[1100] Loss[2.931] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[1100] rmse=0.021568 lr=0.384113 [1,0]:INFO:root:Epoch[38] Batch[1200] Loss[3.100] [1,0]:INFO:root:Epoch[38] Rank[0] Batch[1200] rmse=0.021598 lr=0.384039 [1,0]:INFO:root:Epoch[38] Rank[0] Batch[1251] Time cost=399.39 Train-metric=0.021599 [1,0]:INFO:root:Epoch[38] Speed: 3207.46 samples/sec [1,0]:INFO:root:Epoch[39] Batch[100] Loss[3.038] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[100] rmse=0.021666 lr=0.383927 [1,0]:INFO:root:Epoch[39] Batch[200] Loss[5.365] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[200] rmse=0.021607 lr=0.383852 [1,0]:INFO:root:Epoch[39] Batch[300] Loss[5.126] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[300] rmse=0.021585 lr=0.383778 [1,0]:INFO:root:Epoch[39] Batch[400] Loss[4.682] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[400] rmse=0.021574 lr=0.383703 [1,0]:INFO:root:Epoch[39] Batch[500] Loss[2.973] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[500] rmse=0.021565 lr=0.383628 [1,0]:INFO:root:Epoch[39] Batch[600] Loss[2.814] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[600] rmse=0.021541 lr=0.383553 [1,0]:INFO:root:Epoch[39] Batch[700] Loss[4.271] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[700] rmse=0.021570 lr=0.383477 [1,0]:INFO:root:Epoch[39] Batch[800] Loss[3.317] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[800] rmse=0.021594 lr=0.383402 [1,0]:INFO:root:Epoch[39] Batch[900] Loss[3.161] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[900] rmse=0.021617 lr=0.383326 [1,0]:INFO:root:Epoch[39] Batch[1000] Loss[3.235] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[1000] rmse=0.021613 lr=0.383251 [1,0]:INFO:root:Epoch[39] Batch[1100] Loss[2.713] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[1100] rmse=0.021617 lr=0.383175 [1,0]:INFO:root:Epoch[39] Batch[1200] Loss[4.568] [1,0]:INFO:root:Epoch[39] Rank[0] Batch[1200] rmse=0.021607 lr=0.383099 [1,0]:INFO:root:Epoch[39] Rank[0] Batch[1251] Time cost=398.50 Train-metric=0.021606 [1,0]:INFO:root:Epoch[39] Speed: 3214.60 samples/sec [1,0]:INFO:root:Epoch[39] Rank[0] Validation-accuracy=0.546820 Validation-top_k_accuracy_5=0.792020 [1,0]:INFO:root:Epoch[40] Batch[100] Loss[3.030] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[100] rmse=0.021646 lr=0.382983 [1,0]:INFO:root:Epoch[40] Batch[200] Loss[3.205] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[200] rmse=0.021604 lr=0.382907 [1,0]:INFO:root:Epoch[40] Batch[300] Loss[4.347] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[300] rmse=0.021652 lr=0.382830 [1,0]:INFO:root:Epoch[40] Batch[400] Loss[3.302] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[400] rmse=0.021608 lr=0.382753 [1,0]:INFO:root:Epoch[40] Batch[500] Loss[5.204] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[500] rmse=0.021590 lr=0.382676 [1,0]:INFO:root:Epoch[40] Batch[600] Loss[3.464] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[600] rmse=0.021593 lr=0.382599 [1,0]:INFO:root:Epoch[40] Batch[700] Loss[2.883] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[700] rmse=0.021577 lr=0.382522 [1,0]:INFO:root:Epoch[40] Batch[800] Loss[3.143] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[800] rmse=0.021576 lr=0.382444 [1,0]:INFO:root:Epoch[40] Batch[900] Loss[3.218] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[900] rmse=0.021579 lr=0.382367 [1,0]:INFO:root:Epoch[40] Batch[1000] Loss[3.611] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[1000] rmse=0.021614 lr=0.382289 [1,0]:INFO:root:Epoch[40] Batch[1100] Loss[3.869] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[1100] rmse=0.021619 lr=0.382211 [1,0]:INFO:root:Epoch[40] Batch[1200] Loss[2.914] [1,0]:INFO:root:Epoch[40] Rank[0] Batch[1200] rmse=0.021609 lr=0.382133 [1,0]:INFO:root:Epoch[40] Rank[0] Batch[1251] Time cost=396.76 Train-metric=0.021618 [1,0]:INFO:root:Epoch[40] Speed: 3228.72 samples/sec [1,0]:INFO:root:Epoch[41] Batch[100] Loss[2.943] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[100] rmse=0.021391 lr=0.382014 [1,0]:INFO:root:Epoch[41] Batch[200] Loss[5.351] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[200] rmse=0.021457 lr=0.381936 [1,0]:INFO:root:Epoch[41] Batch[300] Loss[2.813] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[300] rmse=0.021496 lr=0.381857 [1,0]:INFO:root:Epoch[41] Batch[400] Loss[2.963] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[400] rmse=0.021478 lr=0.381778 [1,0]:INFO:root:Epoch[41] Batch[500] Loss[2.965] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[500] rmse=0.021481 lr=0.381699 [1,0]:INFO:root:Epoch[41] Batch[600] Loss[2.932] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[600] rmse=0.021453 lr=0.381620 [1,0]:INFO:root:Epoch[41] Batch[700] Loss[5.574] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[700] rmse=0.021466 lr=0.381540 [1,0]:INFO:root:Epoch[41] Batch[800] Loss[4.149] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[800] rmse=0.021488 lr=0.381461 [1,0]:INFO:root:Epoch[41] Batch[900] Loss[5.206] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[900] rmse=0.021520 lr=0.381381 [1,0]:INFO:root:Epoch[41] Batch[1000] Loss[5.245] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[1000] rmse=0.021528 lr=0.381301 [1,0]:INFO:root:Epoch[41] Batch[1100] Loss[3.717] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[1100] rmse=0.021550 lr=0.381221 [1,0]:INFO:root:Epoch[41] Batch[1200] Loss[4.106] [1,0]:INFO:root:Epoch[41] Rank[0] Batch[1200] rmse=0.021551 lr=0.381141 [1,0]:INFO:root:Epoch[41] Rank[0] Batch[1251] Time cost=398.49 Train-metric=0.021566 [1,0]:INFO:root:Epoch[41] Speed: 3214.71 samples/sec [1,0]:INFO:root:Epoch[42] Batch[100] Loss[3.573] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[100] rmse=0.021386 lr=0.381020 [1,0]:INFO:root:Epoch[42] Batch[200] Loss[3.900] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[200] rmse=0.021486 lr=0.380939 [1,0]:INFO:root:Epoch[42] Batch[300] Loss[5.588] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[300] rmse=0.021357 lr=0.380858 [1,0]:INFO:root:Epoch[42] Batch[400] Loss[5.327] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[400] rmse=0.021389 lr=0.380777 [1,0]:INFO:root:Epoch[42] Batch[500] Loss[3.437] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[500] rmse=0.021454 lr=0.380696 [1,0]:INFO:root:Epoch[42] Batch[600] Loss[2.931] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[600] rmse=0.021503 lr=0.380615 [1,0]:INFO:root:Epoch[42] Batch[700] Loss[4.870] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[700] rmse=0.021508 lr=0.380533 [1,0]:INFO:root:Epoch[42] Batch[800] Loss[3.482] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[800] rmse=0.021486 lr=0.380452 [1,0]:INFO:root:Epoch[42] Batch[900] Loss[5.467] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[900] rmse=0.021488 lr=0.380370 [1,0]:INFO:root:Epoch[42] Batch[1000] Loss[3.004] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[1000] rmse=0.021493 lr=0.380288 [1,0]:INFO:root:Epoch[42] Batch[1100] Loss[4.107] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[1100] rmse=0.021506 lr=0.380206 [1,0]:INFO:root:Epoch[42] Batch[1200] Loss[3.040] [1,0]:INFO:root:Epoch[42] Rank[0] Batch[1200] rmse=0.021516 lr=0.380124 [1,0]:INFO:root:Epoch[42] Rank[0] Batch[1251] Time cost=398.89 Train-metric=0.021521 [1,0]:INFO:root:Epoch[42] Speed: 3211.48 samples/sec [1,0]:INFO:root:Epoch[43] Batch[100] Loss[2.961] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[100] rmse=0.021440 lr=0.380000 [1,0]:INFO:root:Epoch[43] Batch[200] Loss[4.058] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[200] rmse=0.021408 lr=0.379917 [1,0]:INFO:root:Epoch[43] Batch[300] Loss[3.181] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[300] rmse=0.021437 lr=0.379834 [1,0]:INFO:root:Epoch[43] Batch[400] Loss[3.092] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[400] rmse=0.021387 lr=0.379751 [1,0]:INFO:root:Epoch[43] Batch[500] Loss[5.258] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[500] rmse=0.021352 lr=0.379668 [1,0]:INFO:root:Epoch[43] Batch[600] Loss[3.839] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[600] rmse=0.021354 lr=0.379585 [1,0]:INFO:root:Epoch[43] Batch[700] Loss[2.943] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[700] rmse=0.021367 lr=0.379501 [1,0]:INFO:root:Epoch[43] Batch[800] Loss[3.506] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[800] rmse=0.021400 lr=0.379418 [1,0]:INFO:root:Epoch[43] Batch[900] Loss[3.147] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[900] rmse=0.021414 lr=0.379334 [1,0]:INFO:root:Epoch[43] Batch[1000] Loss[4.874] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[1000] rmse=0.021430 lr=0.379250 [1,0]:INFO:root:Epoch[43] Batch[1100] Loss[3.064] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[1100] rmse=0.021459 lr=0.379166 [1,0]:INFO:root:Epoch[43] Batch[1200] Loss[3.948] [1,0]:INFO:root:Epoch[43] Rank[0] Batch[1200] rmse=0.021452 lr=0.379082 [1,0]:INFO:root:Epoch[43] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.021457 [1,0]:INFO:root:Epoch[43] Speed: 3207.93 samples/sec [1,0]:INFO:root:Epoch[44] Batch[100] Loss[3.116] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[100] rmse=0.021577 lr=0.378954 [1,0]:INFO:root:Epoch[44] Batch[200] Loss[3.157] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[200] rmse=0.021563 lr=0.378870 [1,0]:INFO:root:Epoch[44] Batch[300] Loss[2.808] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[300] rmse=0.021562 lr=0.378785 [1,0]:INFO:root:Epoch[44] Batch[400] Loss[3.001] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[400] rmse=0.021532 lr=0.378700 [1,0]:INFO:root:Epoch[44] Batch[500] Loss[3.273] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[500] rmse=0.021536 lr=0.378615 [1,0]:INFO:root:Epoch[44] Batch[600] Loss[3.010] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[600] rmse=0.021531 lr=0.378529 [1,0]:INFO:root:Epoch[44] Batch[700] Loss[3.252] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[700] rmse=0.021532 lr=0.378444 [1,0]:INFO:root:Epoch[44] Batch[800] Loss[3.991] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[800] rmse=0.021553 lr=0.378358 [1,0]:INFO:root:Epoch[44] Batch[900] Loss[2.941] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[900] rmse=0.021576 lr=0.378273 [1,0]:INFO:root:Epoch[44] Batch[1000] Loss[3.222] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[1000] rmse=0.021581 lr=0.378187 [1,0]:INFO:root:Epoch[44] Batch[1100] Loss[5.558] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[1100] rmse=0.021566 lr=0.378101 [1,0]:INFO:root:Epoch[44] Batch[1200] Loss[4.737] [1,0]:INFO:root:Epoch[44] Rank[0] Batch[1200] rmse=0.021569 lr=0.378014 [1,0]:INFO:root:Epoch[44] Rank[0] Batch[1251] Time cost=402.95 Train-metric=0.021561 [1,0]:INFO:root:Epoch[44] Speed: 3179.12 samples/sec [1,0]:INFO:root:Epoch[44] Rank[0] Validation-accuracy=0.559860 Validation-top_k_accuracy_5=0.803180 [1,0]:INFO:root:Epoch[45] Batch[100] Loss[5.134] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[100] rmse=0.021405 lr=0.377884 [1,0]:INFO:root:Epoch[45] Batch[200] Loss[3.143] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[200] rmse=0.021526 lr=0.377797 [1,0]:INFO:root:Epoch[45] Batch[300] Loss[3.217] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[300] rmse=0.021417 lr=0.377710 [1,0]:INFO:root:Epoch[45] Batch[400] Loss[2.983] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[400] rmse=0.021445 lr=0.377623 [1,0]:INFO:root:Epoch[45] Batch[500] Loss[5.234] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[500] rmse=0.021445 lr=0.377536 [1,0]:INFO:root:Epoch[45] Batch[600] Loss[4.147] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[600] rmse=0.021464 lr=0.377449 [1,0]:INFO:root:Epoch[45] Batch[700] Loss[3.080] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[700] rmse=0.021471 lr=0.377362 [1,0]:INFO:root:Epoch[45] Batch[800] Loss[5.345] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[800] rmse=0.021463 lr=0.377274 [1,0]:INFO:root:Epoch[45] Batch[900] Loss[3.278] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[900] rmse=0.021493 lr=0.377186 [1,0]:INFO:root:Epoch[45] Batch[1000] Loss[4.636] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[1000] rmse=0.021510 lr=0.377098 [1,0]:INFO:root:Epoch[45] Batch[1100] Loss[3.025] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[1100] rmse=0.021522 lr=0.377010 [1,0]:INFO:root:Epoch[45] Batch[1200] Loss[3.111] [1,0]:INFO:root:Epoch[45] Rank[0] Batch[1200] rmse=0.021530 lr=0.376922 [1,0]:INFO:root:Epoch[45] Rank[0] Batch[1251] Time cost=401.76 Train-metric=0.021539 [1,0]:INFO:root:Epoch[45] Speed: 3188.54 samples/sec [1,0]:INFO:root:Epoch[46] Batch[100] Loss[3.287] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[100] rmse=0.021619 lr=0.376788 [1,0]:INFO:root:Epoch[46] Batch[200] Loss[2.989] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[200] rmse=0.021562 lr=0.376700 [1,0]:INFO:root:Epoch[46] Batch[300] Loss[2.953] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[300] rmse=0.021494 lr=0.376611 [1,0]:INFO:root:Epoch[46] Batch[400] Loss[3.285] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[400] rmse=0.021470 lr=0.376522 [1,0]:INFO:root:Epoch[46] Batch[500] Loss[2.985] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[500] rmse=0.021423 lr=0.376433 [1,0]:INFO:root:Epoch[46] Batch[600] Loss[2.977] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[600] rmse=0.021443 lr=0.376344 [1,0]:INFO:root:Epoch[46] Batch[700] Loss[3.409] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[700] rmse=0.021462 lr=0.376254 [1,0]:INFO:root:Epoch[46] Batch[800] Loss[3.309] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[800] rmse=0.021468 lr=0.376165 [1,0]:INFO:root:Epoch[46] Batch[900] Loss[5.404] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[900] rmse=0.021460 lr=0.376075 [1,0]:INFO:root:Epoch[46] Batch[1000] Loss[3.478] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[1000] rmse=0.021467 lr=0.375985 [1,0]:INFO:root:Epoch[46] Batch[1100] Loss[2.819] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[1100] rmse=0.021467 lr=0.375895 [1,0]:INFO:root:Epoch[46] Batch[1200] Loss[2.872] [1,0]:INFO:root:Epoch[46] Rank[0] Batch[1200] rmse=0.021461 lr=0.375805 [1,0]:INFO:root:Epoch[46] Rank[0] Batch[1251] Time cost=399.89 Train-metric=0.021458 [1,0]:INFO:root:Epoch[46] Speed: 3203.41 samples/sec [1,0]:INFO:root:Epoch[47] Batch[100] Loss[2.889] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[100] rmse=0.021394 lr=0.375668 [1,0]:INFO:root:Epoch[47] Batch[200] Loss[3.033] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[200] rmse=0.021405 lr=0.375578 [1,0]:INFO:root:Epoch[47] Batch[300] Loss[3.407] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[300] rmse=0.021360 lr=0.375487 [1,0]:INFO:root:Epoch[47] Batch[400] Loss[3.249] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[400] rmse=0.021469 lr=0.375396 [1,0]:INFO:root:Epoch[47] Batch[500] Loss[3.583] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[500] rmse=0.021469 lr=0.375305 [1,0]:INFO:root:Epoch[47] Batch[600] Loss[3.115] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[600] rmse=0.021438 lr=0.375214 [1,0]:INFO:root:Epoch[47] Batch[700] Loss[3.313] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[700] rmse=0.021469 lr=0.375122 [1,0]:INFO:root:Epoch[47] Batch[800] Loss[5.394] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[800] rmse=0.021456 lr=0.375031 [1,0]:INFO:root:Epoch[47] Batch[900] Loss[3.361] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[900] rmse=0.021452 lr=0.374939 [1,0]:INFO:root:Epoch[47] Batch[1000] Loss[2.930] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[1000] rmse=0.021473 lr=0.374847 [1,0]:INFO:root:Epoch[47] Batch[1100] Loss[5.359] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[1100] rmse=0.021450 lr=0.374755 [1,0]:INFO:root:Epoch[47] Batch[1200] Loss[3.957] [1,0]:INFO:root:Epoch[47] Rank[0] Batch[1200] rmse=0.021451 lr=0.374663 [1,0]:INFO:root:Epoch[47] Rank[0] Batch[1251] Time cost=400.99 Train-metric=0.021450 [1,0]:INFO:root:Epoch[47] Speed: 3194.64 samples/sec [1,0]:INFO:root:Epoch[48] Batch[100] Loss[5.597] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[100] rmse=0.021429 lr=0.374523 [1,0]:INFO:root:Epoch[48] Batch[200] Loss[3.221] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[200] rmse=0.021416 lr=0.374431 [1,0]:INFO:root:Epoch[48] Batch[300] Loss[3.159] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[300] rmse=0.021405 lr=0.374338 [1,0]:INFO:root:Epoch[48] Batch[400] Loss[5.407] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[400] rmse=0.021393 lr=0.374245 [1,0]:INFO:root:Epoch[48] Batch[500] Loss[3.365] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[500] rmse=0.021391 lr=0.374152 [1,0]:INFO:root:Epoch[48] Batch[600] Loss[3.179] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[600] rmse=0.021397 lr=0.374059 [1,0]:INFO:root:Epoch[48] Batch[700] Loss[3.207] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[700] rmse=0.021427 lr=0.373966 [1,0]:INFO:root:Epoch[48] Batch[800] Loss[3.164] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[800] rmse=0.021408 lr=0.373872 [1,0]:INFO:root:Epoch[48] Batch[900] Loss[3.408] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[900] rmse=0.021408 lr=0.373779 [1,0]:INFO:root:Epoch[48] Batch[1000] Loss[3.332] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[1000] rmse=0.021434 lr=0.373685 [1,0]:INFO:root:Epoch[48] Batch[1100] Loss[3.193] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[1100] rmse=0.021445 lr=0.373591 [1,0]:INFO:root:Epoch[48] Batch[1200] Loss[2.761] [1,0]:INFO:root:Epoch[48] Rank[0] Batch[1200] rmse=0.021445 lr=0.373497 [1,0]:INFO:root:Epoch[48] Rank[0] Batch[1251] Time cost=399.09 Train-metric=0.021440 [1,0]:INFO:root:Epoch[48] Speed: 3209.86 samples/sec [1,0]:INFO:root:Epoch[49] Batch[100] Loss[2.995] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[100] rmse=0.021526 lr=0.373354 [1,0]:INFO:root:Epoch[49] Batch[200] Loss[3.143] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[200] rmse=0.021344 lr=0.373260 [1,0]:INFO:root:Epoch[49] Batch[300] Loss[3.390] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[300] rmse=0.021314 lr=0.373165 [1,0]:INFO:root:Epoch[49] Batch[400] Loss[3.693] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[400] rmse=0.021277 lr=0.373070 [1,0]:INFO:root:Epoch[49] Batch[500] Loss[2.969] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[500] rmse=0.021337 lr=0.372975 [1,0]:INFO:root:Epoch[49] Batch[600] Loss[3.016] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[600] rmse=0.021336 lr=0.372880 [1,0]:INFO:root:Epoch[49] Batch[700] Loss[4.827] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[700] rmse=0.021363 lr=0.372785 [1,0]:INFO:root:Epoch[49] Batch[800] Loss[3.006] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[800] rmse=0.021361 lr=0.372689 [1,0]:INFO:root:Epoch[49] Batch[900] Loss[3.194] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[900] rmse=0.021399 lr=0.372594 [1,0]:INFO:root:Epoch[49] Batch[1000] Loss[4.188] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[1000] rmse=0.021424 lr=0.372498 [1,0]:INFO:root:Epoch[49] Batch[1100] Loss[3.034] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[1100] rmse=0.021424 lr=0.372402 [1,0]:INFO:root:Epoch[49] Batch[1200] Loss[2.904] [1,0]:INFO:root:Epoch[49] Rank[0] Batch[1200] rmse=0.021448 lr=0.372306 [1,0]:INFO:root:Epoch[49] Rank[0] Batch[1251] Time cost=398.81 Train-metric=0.021458 [1,0]:INFO:root:Epoch[49] Speed: 3212.13 samples/sec [1,0]:INFO:root:Epoch[49] Rank[0] Validation-accuracy=0.559560 Validation-top_k_accuracy_5=0.803140 [1,0]:INFO:root:Epoch[50] Batch[100] Loss[4.187] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[100] rmse=0.021244 lr=0.372161 [1,0]:INFO:root:Epoch[50] Batch[200] Loss[2.888] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[200] rmse=0.021419 lr=0.372064 [1,0]:INFO:root:Epoch[50] Batch[300] Loss[3.508] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[300] rmse=0.021350 lr=0.371967 [1,0]:INFO:root:Epoch[50] Batch[400] Loss[2.932] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[400] rmse=0.021356 lr=0.371871 [1,0]:INFO:root:Epoch[50] Batch[500] Loss[2.681] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[500] rmse=0.021370 lr=0.371774 [1,0]:INFO:root:Epoch[50] Batch[600] Loss[4.528] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[600] rmse=0.021372 lr=0.371677 [1,0]:INFO:root:Epoch[50] Batch[700] Loss[2.739] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[700] rmse=0.021392 lr=0.371579 [1,0]:INFO:root:Epoch[50] Batch[800] Loss[3.713] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[800] rmse=0.021392 lr=0.371482 [1,0]:INFO:root:Epoch[50] Batch[900] Loss[4.326] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[900] rmse=0.021384 lr=0.371385 [1,0]:INFO:root:Epoch[50] Batch[1000] Loss[3.005] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[1000] rmse=0.021413 lr=0.371287 [1,0]:INFO:root:Epoch[50] Batch[1100] Loss[3.070] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[1100] rmse=0.021432 lr=0.371189 [1,0]:INFO:root:Epoch[50] Batch[1200] Loss[3.900] [1,0]:INFO:root:Epoch[50] Rank[0] Batch[1200] rmse=0.021465 lr=0.371091 [1,0]:INFO:root:Epoch[50] Rank[0] Batch[1251] Time cost=398.02 Train-metric=0.021468 [1,0]:INFO:root:Epoch[50] Speed: 3218.52 samples/sec [1,0]:INFO:root:Epoch[51] Batch[100] Loss[2.760] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[100] rmse=0.021453 lr=0.370943 [1,0]:INFO:root:Epoch[51] Batch[200] Loss[5.153] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[200] rmse=0.021375 lr=0.370844 [1,0]:INFO:root:Epoch[51] Batch[300] Loss[3.499] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[300] rmse=0.021428 lr=0.370746 [1,0]:INFO:root:Epoch[51] Batch[400] Loss[2.961] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[400] rmse=0.021423 lr=0.370647 [1,0]:INFO:root:Epoch[51] Batch[500] Loss[4.979] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[500] rmse=0.021422 lr=0.370548 [1,0]:INFO:root:Epoch[51] Batch[600] Loss[3.580] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[600] rmse=0.021444 lr=0.370449 [1,0]:INFO:root:Epoch[51] Batch[700] Loss[2.961] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[700] rmse=0.021472 lr=0.370350 [1,0]:INFO:root:Epoch[51] Batch[800] Loss[2.974] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[800] rmse=0.021463 lr=0.370251 [1,0]:INFO:root:Epoch[51] Batch[900] Loss[2.937] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[900] rmse=0.021477 lr=0.370151 [1,0]:INFO:root:Epoch[51] Batch[1000] Loss[3.612] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[1000] rmse=0.021494 lr=0.370052 [1,0]:INFO:root:Epoch[51] Batch[1100] Loss[2.980] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[1100] rmse=0.021489 lr=0.369952 [1,0]:INFO:root:Epoch[51] Batch[1200] Loss[4.338] [1,0]:INFO:root:Epoch[51] Rank[0] Batch[1200] rmse=0.021486 lr=0.369852 [1,0]:INFO:root:Epoch[51] Rank[0] Batch[1251] Time cost=399.30 Train-metric=0.021483 [1,0]:INFO:root:Epoch[51] Speed: 3208.14 samples/sec [1,0]:INFO:root:Epoch[52] Batch[100] Loss[5.323] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[100] rmse=0.021240 lr=0.369701 [1,0]:INFO:root:Epoch[52] Batch[200] Loss[3.812] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[200] rmse=0.021346 lr=0.369601 [1,0]:INFO:root:Epoch[52] Batch[300] Loss[2.935] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[300] rmse=0.021372 lr=0.369500 [1,0]:INFO:root:Epoch[52] Batch[400] Loss[3.090] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[400] rmse=0.021400 lr=0.369400 [1,0]:INFO:root:Epoch[52] Batch[500] Loss[4.143] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[500] rmse=0.021414 lr=0.369299 [1,0]:INFO:root:Epoch[52] Batch[600] Loss[2.985] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[600] rmse=0.021437 lr=0.369198 [1,0]:INFO:root:Epoch[52] Batch[700] Loss[3.073] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[700] rmse=0.021437 lr=0.369097 [1,0]:INFO:root:Epoch[52] Batch[800] Loss[2.866] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[800] rmse=0.021410 lr=0.368996 [1,0]:INFO:root:Epoch[52] Batch[900] Loss[3.232] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[900] rmse=0.021409 lr=0.368894 [1,0]:INFO:root:Epoch[52] Batch[1000] Loss[2.910] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[1000] rmse=0.021395 lr=0.368793 [1,0]:INFO:root:Epoch[52] Batch[1100] Loss[3.109] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[1100] rmse=0.021408 lr=0.368691 [1,0]:INFO:root:Epoch[52] Batch[1200] Loss[3.235] [1,0]:INFO:root:Epoch[52] Rank[0] Batch[1200] rmse=0.021415 lr=0.368589 [1,0]:INFO:root:Epoch[52] Rank[0] Batch[1251] Time cost=400.41 Train-metric=0.021414 [1,0]:INFO:root:Epoch[52] Speed: 3199.31 samples/sec [1,0]:INFO:root:Epoch[53] Batch[100] Loss[3.150] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[100] rmse=0.021228 lr=0.368435 [1,0]:INFO:root:Epoch[53] Batch[200] Loss[3.012] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[200] rmse=0.021246 lr=0.368333 [1,0]:INFO:root:Epoch[53] Batch[300] Loss[2.943] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[300] rmse=0.021248 lr=0.368231 [1,0]:INFO:root:Epoch[53] Batch[400] Loss[2.799] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[400] rmse=0.021259 lr=0.368128 [1,0]:INFO:root:Epoch[53] Batch[500] Loss[2.981] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[500] rmse=0.021322 lr=0.368026 [1,0]:INFO:root:Epoch[53] Batch[600] Loss[4.510] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[600] rmse=0.021356 lr=0.367923 [1,0]:INFO:root:Epoch[53] Batch[700] Loss[2.966] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[700] rmse=0.021371 lr=0.367820 [1,0]:INFO:root:Epoch[53] Batch[800] Loss[2.694] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[800] rmse=0.021404 lr=0.367717 [1,0]:INFO:root:Epoch[53] Batch[900] Loss[3.332] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[900] rmse=0.021415 lr=0.367614 [1,0]:INFO:root:Epoch[53] Batch[1000] Loss[5.288] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[1000] rmse=0.021427 lr=0.367510 [1,0]:INFO:root:Epoch[53] Batch[1100] Loss[4.044] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[1100] rmse=0.021426 lr=0.367407 [1,0]:INFO:root:Epoch[53] Batch[1200] Loss[3.047] [1,0]:INFO:root:Epoch[53] Rank[0] Batch[1200] rmse=0.021431 lr=0.367303 [1,0]:INFO:root:Epoch[53] Rank[0] Batch[1251] Time cost=399.45 Train-metric=0.021420 [1,0]:INFO:root:Epoch[53] Speed: 3206.94 samples/sec [1,0]:INFO:root:Epoch[54] Batch[100] Loss[3.494] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[100] rmse=0.021105 lr=0.367146 [1,0]:INFO:root:Epoch[54] Batch[200] Loss[3.101] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[200] rmse=0.021275 lr=0.367042 [1,0]:INFO:root:Epoch[54] Batch[300] Loss[3.000] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[300] rmse=0.021321 lr=0.366938 [1,0]:INFO:root:Epoch[54] Batch[400] Loss[2.786] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[400] rmse=0.021318 lr=0.366834 [1,0]:INFO:root:Epoch[54] Batch[500] Loss[2.781] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[500] rmse=0.021368 lr=0.366729 [1,0]:INFO:root:Epoch[54] Batch[600] Loss[2.730] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[600] rmse=0.021381 lr=0.366624 [1,0]:INFO:root:Epoch[54] Batch[700] Loss[3.842] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[700] rmse=0.021409 lr=0.366520 [1,0]:INFO:root:Epoch[54] Batch[800] Loss[2.861] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[800] rmse=0.021406 lr=0.366415 [1,0]:INFO:root:Epoch[54] Batch[900] Loss[3.286] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[900] rmse=0.021408 lr=0.366309 [1,0]:INFO:root:Epoch[54] Batch[1000] Loss[3.896] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[1000] rmse=0.021407 lr=0.366204 [1,0]:INFO:root:Epoch[54] Batch[1100] Loss[4.758] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[1100] rmse=0.021405 lr=0.366099 [1,0]:INFO:root:Epoch[54] Batch[1200] Loss[4.291] [1,0]:INFO:root:Epoch[54] Rank[0] Batch[1200] rmse=0.021411 lr=0.365993 [1,0]:INFO:root:Epoch[54] Rank[0] Batch[1251] Time cost=399.09 Train-metric=0.021410 [1,0]:INFO:root:Epoch[54] Speed: 3209.84 samples/sec [1,0]:INFO:root:Epoch[54] Rank[0] Validation-accuracy=0.574500 Validation-top_k_accuracy_5=0.817740 [1,0]:INFO:root:Epoch[55] Batch[100] Loss[3.326] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[100] rmse=0.021402 lr=0.365834 [1,0]:INFO:root:Epoch[55] Batch[200] Loss[2.659] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[200] rmse=0.021429 lr=0.365728 [1,0]:INFO:root:Epoch[55] Batch[300] Loss[3.375] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[300] rmse=0.021426 lr=0.365621 [1,0]:INFO:root:Epoch[55] Batch[400] Loss[3.161] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[400] rmse=0.021417 lr=0.365515 [1,0]:INFO:root:Epoch[55] Batch[500] Loss[3.356] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[500] rmse=0.021462 lr=0.365409 [1,0]:INFO:root:Epoch[55] Batch[600] Loss[2.820] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[600] rmse=0.021462 lr=0.365302 [1,0]:INFO:root:Epoch[55] Batch[700] Loss[3.510] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[700] rmse=0.021444 lr=0.365196 [1,0]:INFO:root:Epoch[55] Batch[800] Loss[3.459] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[800] rmse=0.021457 lr=0.365089 [1,0]:INFO:root:Epoch[55] Batch[900] Loss[3.194] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[900] rmse=0.021428 lr=0.364982 [1,0]:INFO:root:Epoch[55] Batch[1000] Loss[5.408] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[1000] rmse=0.021445 lr=0.364875 [1,0]:INFO:root:Epoch[55] Batch[1100] Loss[4.200] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[1100] rmse=0.021459 lr=0.364767 [1,0]:INFO:root:Epoch[55] Batch[1200] Loss[3.078] [1,0]:INFO:root:Epoch[55] Rank[0] Batch[1200] rmse=0.021449 lr=0.364660 [1,0]:INFO:root:Epoch[55] Rank[0] Batch[1251] Time cost=398.13 Train-metric=0.021463 [1,0]:INFO:root:Epoch[55] Speed: 3217.59 samples/sec [1,0]:INFO:root:Epoch[56] Batch[100] Loss[3.431] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[100] rmse=0.021501 lr=0.364498 [1,0]:INFO:root:Epoch[56] Batch[200] Loss[4.922] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[200] rmse=0.021337 lr=0.364390 [1,0]:INFO:root:Epoch[56] Batch[300] Loss[4.310] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[300] rmse=0.021301 lr=0.364282 [1,0]:INFO:root:Epoch[56] Batch[400] Loss[3.055] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[400] rmse=0.021360 lr=0.364174 [1,0]:INFO:root:Epoch[56] Batch[500] Loss[3.255] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[500] rmse=0.021342 lr=0.364066 [1,0]:INFO:root:Epoch[56] Batch[600] Loss[3.059] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[600] rmse=0.021350 lr=0.363957 [1,0]:INFO:root:Epoch[56] Batch[700] Loss[2.831] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[700] rmse=0.021382 lr=0.363849 [1,0]:INFO:root:Epoch[56] Batch[800] Loss[3.433] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[800] rmse=0.021390 lr=0.363740 [1,0]:INFO:root:Epoch[56] Batch[900] Loss[3.015] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[900] rmse=0.021403 lr=0.363631 [1,0]:INFO:root:Epoch[56] Batch[1000] Loss[2.951] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[1000] rmse=0.021408 lr=0.363522 [1,0]:INFO:root:Epoch[56] Batch[1100] Loss[3.184] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[1100] rmse=0.021423 lr=0.363413 [1,0]:INFO:root:Epoch[56] Batch[1200] Loss[5.117] [1,0]:INFO:root:Epoch[56] Rank[0] Batch[1200] rmse=0.021422 lr=0.363304 [1,0]:INFO:root:Epoch[56] Rank[0] Batch[1251] Time cost=399.77 Train-metric=0.021427 [1,0]:INFO:root:Epoch[56] Speed: 3204.37 samples/sec [1,0]:INFO:root:Epoch[57] Batch[100] Loss[3.545] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[100] rmse=0.021303 lr=0.363139 [1,0]:INFO:root:Epoch[57] Batch[200] Loss[4.619] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[200] rmse=0.021364 lr=0.363029 [1,0]:INFO:root:Epoch[57] Batch[300] Loss[3.832] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[300] rmse=0.021396 lr=0.362919 [1,0]:INFO:root:Epoch[57] Batch[400] Loss[3.274] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[400] rmse=0.021432 lr=0.362809 [1,0]:INFO:root:Epoch[57] Batch[500] Loss[3.223] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[500] rmse=0.021478 lr=0.362699 [1,0]:INFO:root:Epoch[57] Batch[600] Loss[5.003] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[600] rmse=0.021448 lr=0.362589 [1,0]:INFO:root:Epoch[57] Batch[700] Loss[3.111] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[700] rmse=0.021464 lr=0.362479 [1,0]:INFO:root:Epoch[57] Batch[800] Loss[2.980] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[800] rmse=0.021464 lr=0.362368 [1,0]:INFO:root:Epoch[57] Batch[900] Loss[3.786] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[900] rmse=0.021463 lr=0.362257 [1,0]:INFO:root:Epoch[57] Batch[1000] Loss[3.056] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[1000] rmse=0.021461 lr=0.362147 [1,0]:INFO:root:Epoch[57] Batch[1100] Loss[3.055] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[1100] rmse=0.021457 lr=0.362036 [1,0]:INFO:root:Epoch[57] Batch[1200] Loss[5.030] [1,0]:INFO:root:Epoch[57] Rank[0] Batch[1200] rmse=0.021426 lr=0.361925 [1,0]:INFO:root:Epoch[57] Rank[0] Batch[1251] Time cost=398.82 Train-metric=0.021414 [1,0]:INFO:root:Epoch[57] Speed: 3212.04 samples/sec [1,0]:INFO:root:Epoch[58] Batch[100] Loss[2.821] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[100] rmse=0.021200 lr=0.361757 [1,0]:INFO:root:Epoch[58] Batch[200] Loss[2.828] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[200] rmse=0.021227 lr=0.361645 [1,0]:INFO:root:Epoch[58] Batch[300] Loss[3.512] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[300] rmse=0.021245 lr=0.361534 [1,0]:INFO:root:Epoch[58] Batch[400] Loss[3.224] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[400] rmse=0.021320 lr=0.361422 [1,0]:INFO:root:Epoch[58] Batch[500] Loss[3.170] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[500] rmse=0.021330 lr=0.361310 [1,0]:INFO:root:Epoch[58] Batch[600] Loss[3.089] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[600] rmse=0.021303 lr=0.361198 [1,0]:INFO:root:Epoch[58] Batch[700] Loss[3.276] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[700] rmse=0.021340 lr=0.361086 [1,0]:INFO:root:Epoch[58] Batch[800] Loss[4.920] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[800] rmse=0.021333 lr=0.360973 [1,0]:INFO:root:Epoch[58] Batch[900] Loss[3.149] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[900] rmse=0.021364 lr=0.360861 [1,0]:INFO:root:Epoch[58] Batch[1000] Loss[2.859] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[1000] rmse=0.021354 lr=0.360748 [1,0]:INFO:root:Epoch[58] Batch[1100] Loss[2.819] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[1100] rmse=0.021372 lr=0.360636 [1,0]:INFO:root:Epoch[58] Batch[1200] Loss[3.076] [1,0]:INFO:root:Epoch[58] Rank[0] Batch[1200] rmse=0.021390 lr=0.360523 [1,0]:INFO:root:Epoch[58] Rank[0] Batch[1251] Time cost=400.37 Train-metric=0.021412 [1,0]:INFO:root:Epoch[58] Speed: 3199.58 samples/sec [1,0]:INFO:root:Epoch[59] Batch[100] Loss[2.911] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[100] rmse=0.021265 lr=0.360352 [1,0]:INFO:root:Epoch[59] Batch[200] Loss[4.113] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[200] rmse=0.021257 lr=0.360239 [1,0]:INFO:root:Epoch[59] Batch[300] Loss[3.808] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[300] rmse=0.021247 lr=0.360125 [1,0]:INFO:root:Epoch[59] Batch[400] Loss[3.013] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[400] rmse=0.021198 lr=0.360012 [1,0]:INFO:root:Epoch[59] Batch[500] Loss[5.018] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[500] rmse=0.021209 lr=0.359898 [1,0]:INFO:root:Epoch[59] Batch[600] Loss[3.308] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[600] rmse=0.021236 lr=0.359784 [1,0]:INFO:root:Epoch[59] Batch[700] Loss[2.795] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[700] rmse=0.021277 lr=0.359670 [1,0]:INFO:root:Epoch[59] Batch[800] Loss[4.162] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[800] rmse=0.021282 lr=0.359556 [1,0]:INFO:root:Epoch[59] Batch[900] Loss[3.241] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[900] rmse=0.021304 lr=0.359442 [1,0]:INFO:root:Epoch[59] Batch[1000] Loss[4.570] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[1000] rmse=0.021317 lr=0.359328 [1,0]:INFO:root:Epoch[59] Batch[1100] Loss[3.232] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[1100] rmse=0.021333 lr=0.359213 [1,0]:INFO:root:Epoch[59] Batch[1200] Loss[4.192] [1,0]:INFO:root:Epoch[59] Rank[0] Batch[1200] rmse=0.021332 lr=0.359098 [1,0]:INFO:root:Epoch[59] Rank[0] Batch[1251] Time cost=399.21 Train-metric=0.021328 [1,0]:INFO:root:Epoch[59] Speed: 3208.92 samples/sec [1,0]:INFO:root:Epoch[59] Rank[0] Validation-accuracy=0.556400 Validation-top_k_accuracy_5=0.801860 [1,0]:INFO:root:Epoch[60] Batch[100] Loss[3.155] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[100] rmse=0.021360 lr=0.358925 [1,0]:INFO:root:Epoch[60] Batch[200] Loss[4.769] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[200] rmse=0.021251 lr=0.358810 [1,0]:INFO:root:Epoch[60] Batch[300] Loss[3.065] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[300] rmse=0.021256 lr=0.358695 [1,0]:INFO:root:Epoch[60] Batch[400] Loss[5.331] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[400] rmse=0.021224 lr=0.358579 [1,0]:INFO:root:Epoch[60] Batch[500] Loss[5.443] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[500] rmse=0.021249 lr=0.358464 [1,0]:INFO:root:Epoch[60] Batch[600] Loss[3.131] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[600] rmse=0.021286 lr=0.358348 [1,0]:INFO:root:Epoch[60] Batch[700] Loss[3.459] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[700] rmse=0.021319 lr=0.358233 [1,0]:INFO:root:Epoch[60] Batch[800] Loss[3.142] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[800] rmse=0.021335 lr=0.358117 [1,0]:INFO:root:Epoch[60] Batch[900] Loss[3.263] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[900] rmse=0.021330 lr=0.358001 [1,0]:INFO:root:Epoch[60] Batch[1000] Loss[3.634] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[1000] rmse=0.021326 lr=0.357884 [1,0]:INFO:root:Epoch[60] Batch[1100] Loss[3.191] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[1100] rmse=0.021351 lr=0.357768 [1,0]:INFO:root:Epoch[60] Batch[1200] Loss[3.449] [1,0]:INFO:root:Epoch[60] Rank[0] Batch[1200] rmse=0.021359 lr=0.357652 [1,0]:INFO:root:Epoch[60] Rank[0] Batch[1251] Time cost=399.37 Train-metric=0.021361 [1,0]:INFO:root:Epoch[60] Speed: 3207.63 samples/sec [1,0]:INFO:root:Epoch[61] Batch[100] Loss[3.552] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[100] rmse=0.021141 lr=0.357475 [1,0]:INFO:root:Epoch[61] Batch[200] Loss[3.986] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[200] rmse=0.021239 lr=0.357359 [1,0]:INFO:root:Epoch[61] Batch[300] Loss[3.646] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[300] rmse=0.021225 lr=0.357242 [1,0]:INFO:root:Epoch[61] Batch[400] Loss[3.115] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[400] rmse=0.021240 lr=0.357125 [1,0]:INFO:root:Epoch[61] Batch[500] Loss[4.354] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[500] rmse=0.021254 lr=0.357007 [1,0]:INFO:root:Epoch[61] Batch[600] Loss[3.433] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[600] rmse=0.021308 lr=0.356890 [1,0]:INFO:root:Epoch[61] Batch[700] Loss[2.916] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[700] rmse=0.021348 lr=0.356772 [1,0]:INFO:root:Epoch[61] Batch[800] Loss[4.002] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[800] rmse=0.021361 lr=0.356655 [1,0]:INFO:root:Epoch[61] Batch[900] Loss[2.850] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[900] rmse=0.021375 lr=0.356537 [1,0]:INFO:root:Epoch[61] Batch[1000] Loss[3.160] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[1000] rmse=0.021372 lr=0.356419 [1,0]:INFO:root:Epoch[61] Batch[1100] Loss[3.405] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[1100] rmse=0.021357 lr=0.356301 [1,0]:INFO:root:Epoch[61] Batch[1200] Loss[3.861] [1,0]:INFO:root:Epoch[61] Rank[0] Batch[1200] rmse=0.021380 lr=0.356183 [1,0]:INFO:root:Epoch[61] Rank[0] Batch[1251] Time cost=399.14 Train-metric=0.021382 [1,0]:INFO:root:Epoch[61] Speed: 3209.44 samples/sec [1,0]:INFO:root:Epoch[62] Batch[100] Loss[2.955] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[100] rmse=0.021346 lr=0.356004 [1,0]:INFO:root:Epoch[62] Batch[200] Loss[3.860] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[200] rmse=0.021232 lr=0.355885 [1,0]:INFO:root:Epoch[62] Batch[300] Loss[4.532] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[300] rmse=0.021270 lr=0.355767 [1,0]:INFO:root:Epoch[62] Batch[400] Loss[3.216] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[400] rmse=0.021282 lr=0.355648 [1,0]:INFO:root:Epoch[62] Batch[500] Loss[5.358] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[500] rmse=0.021279 lr=0.355529 [1,0]:INFO:root:Epoch[62] Batch[600] Loss[3.165] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[600] rmse=0.021279 lr=0.355410 [1,0]:INFO:root:Epoch[62] Batch[700] Loss[3.186] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[700] rmse=0.021305 lr=0.355290 [1,0]:INFO:root:Epoch[62] Batch[800] Loss[3.294] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[800] rmse=0.021346 lr=0.355171 [1,0]:INFO:root:Epoch[62] Batch[900] Loss[4.868] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[900] rmse=0.021350 lr=0.355051 [1,0]:INFO:root:Epoch[62] Batch[1000] Loss[3.553] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[1000] rmse=0.021311 lr=0.354932 [1,0]:INFO:root:Epoch[62] Batch[1100] Loss[3.015] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[1100] rmse=0.021329 lr=0.354812 [1,0]:INFO:root:Epoch[62] Batch[1200] Loss[2.970] [1,0]:INFO:root:Epoch[62] Rank[0] Batch[1200] rmse=0.021333 lr=0.354692 [1,0]:INFO:root:Epoch[62] Rank[0] Batch[1251] Time cost=399.11 Train-metric=0.021349 [1,0]:INFO:root:Epoch[62] Speed: 3209.71 samples/sec [1,0]:INFO:root:Epoch[63] Batch[100] Loss[2.987] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[100] rmse=0.021450 lr=0.354511 [1,0]:INFO:root:Epoch[63] Batch[200] Loss[3.088] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[200] rmse=0.021398 lr=0.354390 [1,0]:INFO:root:Epoch[63] Batch[300] Loss[3.077] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[300] rmse=0.021382 lr=0.354270 [1,0]:INFO:root:Epoch[63] Batch[400] Loss[3.089] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[400] rmse=0.021372 lr=0.354149 [1,0]:INFO:root:Epoch[63] Batch[500] Loss[3.293] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[500] rmse=0.021340 lr=0.354029 [1,0]:INFO:root:Epoch[63] Batch[600] Loss[5.331] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[600] rmse=0.021351 lr=0.353908 [1,0]:INFO:root:Epoch[63] Batch[700] Loss[4.569] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[700] rmse=0.021356 lr=0.353787 [1,0]:INFO:root:Epoch[63] Batch[800] Loss[2.890] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[800] rmse=0.021367 lr=0.353665 [1,0]:INFO:root:Epoch[63] Batch[900] Loss[3.320] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[900] rmse=0.021340 lr=0.353544 [1,0]:INFO:root:Epoch[63] Batch[1000] Loss[5.166] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[1000] rmse=0.021334 lr=0.353423 [1,0]:INFO:root:Epoch[63] Batch[1100] Loss[2.758] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[1100] rmse=0.021327 lr=0.353301 [1,0]:INFO:root:Epoch[63] Batch[1200] Loss[4.872] [1,0]:INFO:root:Epoch[63] Rank[0] Batch[1200] rmse=0.021337 lr=0.353180 [1,0]:INFO:root:Epoch[63] Rank[0] Batch[1251] Time cost=398.61 Train-metric=0.021334 [1,0]:INFO:root:Epoch[63] Speed: 3213.71 samples/sec [1,0]:INFO:root:Epoch[64] Batch[100] Loss[2.982] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[100] rmse=0.021226 lr=0.352996 [1,0]:INFO:root:Epoch[64] Batch[200] Loss[5.034] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[200] rmse=0.021215 lr=0.352873 [1,0]:INFO:root:Epoch[64] Batch[300] Loss[3.123] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[300] rmse=0.021238 lr=0.352751 [1,0]:INFO:root:Epoch[64] Batch[400] Loss[5.342] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[400] rmse=0.021219 lr=0.352629 [1,0]:INFO:root:Epoch[64] Batch[500] Loss[4.914] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[500] rmse=0.021255 lr=0.352507 [1,0]:INFO:root:Epoch[64] Batch[600] Loss[2.806] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[600] rmse=0.021281 lr=0.352384 [1,0]:INFO:root:Epoch[64] Batch[700] Loss[2.832] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[700] rmse=0.021264 lr=0.352261 [1,0]:INFO:root:Epoch[64] Batch[800] Loss[4.591] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[800] rmse=0.021287 lr=0.352138 [1,0]:INFO:root:Epoch[64] Batch[900] Loss[3.012] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[900] rmse=0.021315 lr=0.352015 [1,0]:INFO:root:Epoch[64] Batch[1000] Loss[3.834] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[1000] rmse=0.021329 lr=0.351892 [1,0]:INFO:root:Epoch[64] Batch[1100] Loss[4.093] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[1100] rmse=0.021333 lr=0.351769 [1,0]:INFO:root:Epoch[64] Batch[1200] Loss[5.034] [1,0]:INFO:root:Epoch[64] Rank[0] Batch[1200] rmse=0.021346 lr=0.351646 [1,0]:INFO:root:Epoch[64] Rank[0] Batch[1251] Time cost=398.93 Train-metric=0.021346 [1,0]:INFO:root:Epoch[64] Speed: 3211.17 samples/sec [1,0]:INFO:root:Epoch[64] Rank[0] Validation-accuracy=0.587840 Validation-top_k_accuracy_5=0.823020 [1,0]:INFO:root:Epoch[65] Batch[100] Loss[3.222] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[100] rmse=0.021190 lr=0.351459 [1,0]:INFO:root:Epoch[65] Batch[200] Loss[2.901] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[200] rmse=0.021161 lr=0.351335 [1,0]:INFO:root:Epoch[65] Batch[300] Loss[4.694] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[300] rmse=0.021233 lr=0.351211 [1,0]:INFO:root:Epoch[65] Batch[400] Loss[3.470] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[400] rmse=0.021277 lr=0.351087 [1,0]:INFO:root:Epoch[65] Batch[500] Loss[4.302] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[500] rmse=0.021274 lr=0.350963 [1,0]:INFO:root:Epoch[65] Batch[600] Loss[3.953] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[600] rmse=0.021304 lr=0.350839 [1,0]:INFO:root:Epoch[65] Batch[700] Loss[2.940] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[700] rmse=0.021320 lr=0.350714 [1,0]:INFO:root:Epoch[65] Batch[800] Loss[3.611] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[800] rmse=0.021320 lr=0.350590 [1,0]:INFO:root:Epoch[65] Batch[900] Loss[4.573] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[900] rmse=0.021313 lr=0.350465 [1,0]:INFO:root:Epoch[65] Batch[1000] Loss[4.632] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[1000] rmse=0.021309 lr=0.350340 [1,0]:INFO:root:Epoch[65] Batch[1100] Loss[5.208] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[1100] rmse=0.021330 lr=0.350215 [1,0]:INFO:root:Epoch[65] Batch[1200] Loss[2.995] [1,0]:INFO:root:Epoch[65] Rank[0] Batch[1200] rmse=0.021331 lr=0.350090 [1,0]:INFO:root:Epoch[65] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.021329 [1,0]:INFO:root:Epoch[65] Speed: 3207.94 samples/sec [1,0]:INFO:root:Epoch[66] Batch[100] Loss[3.528] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[100] rmse=0.021123 lr=0.349901 [1,0]:INFO:root:Epoch[66] Batch[200] Loss[5.436] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[200] rmse=0.021089 lr=0.349776 [1,0]:INFO:root:Epoch[66] Batch[300] Loss[3.007] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[300] rmse=0.021116 lr=0.349650 [1,0]:INFO:root:Epoch[66] Batch[400] Loss[5.195] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[400] rmse=0.021180 lr=0.349524 [1,0]:INFO:root:Epoch[66] Batch[500] Loss[3.628] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[500] rmse=0.021173 lr=0.349399 [1,0]:INFO:root:Epoch[66] Batch[600] Loss[3.214] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[600] rmse=0.021191 lr=0.349273 [1,0]:INFO:root:Epoch[66] Batch[700] Loss[4.199] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[700] rmse=0.021228 lr=0.349147 [1,0]:INFO:root:Epoch[66] Batch[800] Loss[4.516] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[800] rmse=0.021232 lr=0.349020 [1,0]:INFO:root:Epoch[66] Batch[900] Loss[4.603] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[900] rmse=0.021223 lr=0.348894 [1,0]:INFO:root:Epoch[66] Batch[1000] Loss[3.082] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[1000] rmse=0.021220 lr=0.348767 [1,0]:INFO:root:Epoch[66] Batch[1100] Loss[5.140] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[1100] rmse=0.021232 lr=0.348641 [1,0]:INFO:root:Epoch[66] Batch[1200] Loss[4.200] [1,0]:INFO:root:Epoch[66] Rank[0] Batch[1200] rmse=0.021255 lr=0.348514 [1,0]:INFO:root:Epoch[66] Rank[0] Batch[1251] Time cost=398.66 Train-metric=0.021269 [1,0]:INFO:root:Epoch[66] Speed: 3213.30 samples/sec [1,0]:INFO:root:Epoch[67] Batch[100] Loss[3.276] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[100] rmse=0.021279 lr=0.348322 [1,0]:INFO:root:Epoch[67] Batch[200] Loss[4.205] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[200] rmse=0.021301 lr=0.348195 [1,0]:INFO:root:Epoch[67] Batch[300] Loss[3.440] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[300] rmse=0.021264 lr=0.348068 [1,0]:INFO:root:Epoch[67] Batch[400] Loss[3.153] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[400] rmse=0.021274 lr=0.347941 [1,0]:INFO:root:Epoch[67] Batch[500] Loss[4.841] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[500] rmse=0.021271 lr=0.347813 [1,0]:INFO:root:Epoch[67] Batch[600] Loss[2.965] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[600] rmse=0.021285 lr=0.347685 [1,0]:INFO:root:Epoch[67] Batch[700] Loss[5.435] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[700] rmse=0.021289 lr=0.347558 [1,0]:INFO:root:Epoch[67] Batch[800] Loss[3.079] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[800] rmse=0.021292 lr=0.347430 [1,0]:INFO:root:Epoch[67] Batch[900] Loss[3.201] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[900] rmse=0.021311 lr=0.347302 [1,0]:INFO:root:Epoch[67] Batch[1000] Loss[5.390] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[1000] rmse=0.021309 lr=0.347174 [1,0]:INFO:root:Epoch[67] Batch[1100] Loss[4.223] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[1100] rmse=0.021308 lr=0.347045 [1,0]:INFO:root:Epoch[67] Batch[1200] Loss[3.049] [1,0]:INFO:root:Epoch[67] Rank[0] Batch[1200] rmse=0.021320 lr=0.346917 [1,0]:INFO:root:Epoch[67] Rank[0] Batch[1251] Time cost=399.24 Train-metric=0.021314 [1,0]:INFO:root:Epoch[67] Speed: 3208.64 samples/sec [1,0]:INFO:root:Epoch[68] Batch[100] Loss[3.093] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[100] rmse=0.021109 lr=0.346723 [1,0]:INFO:root:Epoch[68] Batch[200] Loss[3.422] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[200] rmse=0.021265 lr=0.346594 [1,0]:INFO:root:Epoch[68] Batch[300] Loss[4.928] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[300] rmse=0.021353 lr=0.346465 [1,0]:INFO:root:Epoch[68] Batch[400] Loss[2.947] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[400] rmse=0.021342 lr=0.346336 [1,0]:INFO:root:Epoch[68] Batch[500] Loss[5.116] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[500] rmse=0.021278 lr=0.346207 [1,0]:INFO:root:Epoch[68] Batch[600] Loss[2.848] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[600] rmse=0.021275 lr=0.346078 [1,0]:INFO:root:Epoch[68] Batch[700] Loss[2.993] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[700] rmse=0.021263 lr=0.345948 [1,0]:INFO:root:Epoch[68] Batch[800] Loss[3.255] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[800] rmse=0.021274 lr=0.345819 [1,0]:INFO:root:Epoch[68] Batch[900] Loss[3.690] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[900] rmse=0.021303 lr=0.345689 [1,0]:INFO:root:Epoch[68] Batch[1000] Loss[4.824] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[1000] rmse=0.021289 lr=0.345559 [1,0]:INFO:root:Epoch[68] Batch[1100] Loss[2.946] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[1100] rmse=0.021280 lr=0.345429 [1,0]:INFO:root:Epoch[68] Batch[1200] Loss[3.037] [1,0]:INFO:root:Epoch[68] Rank[0] Batch[1200] rmse=0.021265 lr=0.345299 [1,0]:INFO:root:Epoch[68] Rank[0] Batch[1251] Time cost=400.88 Train-metric=0.021266 [1,0]:INFO:root:Epoch[68] Speed: 3195.50 samples/sec [1,0]:INFO:root:Epoch[69] Batch[100] Loss[3.196] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[100] rmse=0.021378 lr=0.345102 [1,0]:INFO:root:Epoch[69] Batch[200] Loss[3.895] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[200] rmse=0.021290 lr=0.344972 [1,0]:INFO:root:Epoch[69] Batch[300] Loss[3.527] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[300] rmse=0.021307 lr=0.344842 [1,0]:INFO:root:Epoch[69] Batch[400] Loss[3.099] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[400] rmse=0.021281 lr=0.344711 [1,0]:INFO:root:Epoch[69] Batch[500] Loss[3.071] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[500] rmse=0.021276 lr=0.344580 [1,0]:INFO:root:Epoch[69] Batch[600] Loss[5.183] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[600] rmse=0.021281 lr=0.344449 [1,0]:INFO:root:Epoch[69] Batch[700] Loss[5.385] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[700] rmse=0.021276 lr=0.344318 [1,0]:INFO:root:Epoch[69] Batch[800] Loss[5.155] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[800] rmse=0.021296 lr=0.344187 [1,0]:INFO:root:Epoch[69] Batch[900] Loss[3.066] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[900] rmse=0.021322 lr=0.344056 [1,0]:INFO:root:Epoch[69] Batch[1000] Loss[4.413] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[1000] rmse=0.021325 lr=0.343924 [1,0]:INFO:root:Epoch[69] Batch[1100] Loss[2.971] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[1100] rmse=0.021327 lr=0.343793 [1,0]:INFO:root:Epoch[69] Batch[1200] Loss[3.079] [1,0]:INFO:root:Epoch[69] Rank[0] Batch[1200] rmse=0.021323 lr=0.343661 [1,0]:INFO:root:Epoch[69] Rank[0] Batch[1251] Time cost=402.02 Train-metric=0.021322 [1,0]:INFO:root:Epoch[69] Speed: 3186.47 samples/sec [1,0]:INFO:root:Epoch[69] Rank[0] Validation-accuracy=0.591940 Validation-top_k_accuracy_5=0.828400 [1,0]:INFO:root:Epoch[70] Batch[100] Loss[2.697] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[100] rmse=0.021141 lr=0.343462 [1,0]:INFO:root:Epoch[70] Batch[200] Loss[3.745] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[200] rmse=0.021229 lr=0.343330 [1,0]:INFO:root:Epoch[70] Batch[300] Loss[5.397] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[300] rmse=0.021204 lr=0.343198 [1,0]:INFO:root:Epoch[70] Batch[400] Loss[3.114] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[400] rmse=0.021178 lr=0.343065 [1,0]:INFO:root:Epoch[70] Batch[500] Loss[3.229] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[500] rmse=0.021205 lr=0.342933 [1,0]:INFO:root:Epoch[70] Batch[600] Loss[2.935] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[600] rmse=0.021179 lr=0.342801 [1,0]:INFO:root:Epoch[70] Batch[700] Loss[3.349] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[700] rmse=0.021188 lr=0.342668 [1,0]:INFO:root:Epoch[70] Batch[800] Loss[3.097] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[800] rmse=0.021206 lr=0.342535 [1,0]:INFO:root:Epoch[70] Batch[900] Loss[3.039] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[900] rmse=0.021219 lr=0.342402 [1,0]:INFO:root:Epoch[70] Batch[1000] Loss[2.870] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[1000] rmse=0.021213 lr=0.342269 [1,0]:INFO:root:Epoch[70] Batch[1100] Loss[3.391] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[1100] rmse=0.021221 lr=0.342136 [1,0]:INFO:root:Epoch[70] Batch[1200] Loss[3.243] [1,0]:INFO:root:Epoch[70] Rank[0] Batch[1200] rmse=0.021224 lr=0.342003 [1,0]:INFO:root:Epoch[70] Rank[0] Batch[1251] Time cost=400.80 Train-metric=0.021229 [1,0]:INFO:root:Epoch[70] Speed: 3196.20 samples/sec [1,0]:INFO:root:Epoch[71] Batch[100] Loss[2.844] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[100] rmse=0.021253 lr=0.341801 [1,0]:INFO:root:Epoch[71] Batch[200] Loss[5.391] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[200] rmse=0.021241 lr=0.341668 [1,0]:INFO:root:Epoch[71] Batch[300] Loss[3.576] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[300] rmse=0.021199 lr=0.341534 [1,0]:INFO:root:Epoch[71] Batch[400] Loss[3.064] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[400] rmse=0.021229 lr=0.341400 [1,0]:INFO:root:Epoch[71] Batch[500] Loss[4.663] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[500] rmse=0.021236 lr=0.341266 [1,0]:INFO:root:Epoch[71] Batch[600] Loss[3.194] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[600] rmse=0.021245 lr=0.341132 [1,0]:INFO:root:Epoch[71] Batch[700] Loss[2.812] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[700] rmse=0.021237 lr=0.340998 [1,0]:INFO:root:Epoch[71] Batch[800] Loss[5.257] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[800] rmse=0.021252 lr=0.340863 [1,0]:INFO:root:Epoch[71] Batch[900] Loss[2.988] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[900] rmse=0.021270 lr=0.340729 [1,0]:INFO:root:Epoch[71] Batch[1000] Loss[3.046] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[1000] rmse=0.021283 lr=0.340594 [1,0]:INFO:root:Epoch[71] Batch[1100] Loss[3.510] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[1100] rmse=0.021281 lr=0.340459 [1,0]:INFO:root:Epoch[71] Batch[1200] Loss[2.969] [1,0]:INFO:root:Epoch[71] Rank[0] Batch[1200] rmse=0.021292 lr=0.340324 [1,0]:INFO:root:Epoch[71] Rank[0] Batch[1251] Time cost=405.20 Train-metric=0.021295 [1,0]:INFO:root:Epoch[71] Speed: 3161.46 samples/sec [1,0]:INFO:root:Epoch[72] Batch[100] Loss[3.098] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[100] rmse=0.021329 lr=0.340121 [1,0]:INFO:root:Epoch[72] Batch[200] Loss[2.871] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[200] rmse=0.021146 lr=0.339985 [1,0]:INFO:root:Epoch[72] Batch[300] Loss[5.307] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[300] rmse=0.021173 lr=0.339850 [1,0]:INFO:root:Epoch[72] Batch[400] Loss[2.870] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[400] rmse=0.021171 lr=0.339715 [1,0]:INFO:root:Epoch[72] Batch[500] Loss[4.046] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[500] rmse=0.021166 lr=0.339579 [1,0]:INFO:root:Epoch[72] Batch[600] Loss[3.337] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[600] rmse=0.021182 lr=0.339443 [1,0]:INFO:root:Epoch[72] Batch[700] Loss[3.216] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[700] rmse=0.021196 lr=0.339307 [1,0]:INFO:root:Epoch[72] Batch[800] Loss[3.160] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[800] rmse=0.021190 lr=0.339172 [1,0]:INFO:root:Epoch[72] Batch[900] Loss[2.911] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[900] rmse=0.021172 lr=0.339035 [1,0]:INFO:root:Epoch[72] Batch[1000] Loss[3.244] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[1000] rmse=0.021176 lr=0.338899 [1,0]:INFO:root:Epoch[72] Batch[1100] Loss[3.153] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[1100] rmse=0.021182 lr=0.338763 [1,0]:INFO:root:Epoch[72] Batch[1200] Loss[3.076] [1,0]:INFO:root:Epoch[72] Rank[0] Batch[1200] rmse=0.021200 lr=0.338627 [1,0]:INFO:root:Epoch[72] Rank[0] Batch[1251] Time cost=400.68 Train-metric=0.021204 [1,0]:INFO:root:Epoch[72] Speed: 3197.15 samples/sec [1,0]:INFO:root:Epoch[73] Batch[100] Loss[3.026] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[100] rmse=0.021346 lr=0.338420 [1,0]:INFO:root:Epoch[73] Batch[200] Loss[5.237] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[200] rmse=0.021390 lr=0.338283 [1,0]:INFO:root:Epoch[73] Batch[300] Loss[4.965] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[300] rmse=0.021267 lr=0.338147 [1,0]:INFO:root:Epoch[73] Batch[400] Loss[2.948] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[400] rmse=0.021285 lr=0.338010 [1,0]:INFO:root:Epoch[73] Batch[500] Loss[3.165] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[500] rmse=0.021254 lr=0.337872 [1,0]:INFO:root:Epoch[73] Batch[600] Loss[3.591] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[600] rmse=0.021269 lr=0.337735 [1,0]:INFO:root:Epoch[73] Batch[700] Loss[5.228] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[700] rmse=0.021284 lr=0.337598 [1,0]:INFO:root:Epoch[73] Batch[800] Loss[5.454] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[800] rmse=0.021309 lr=0.337460 [1,0]:INFO:root:Epoch[73] Batch[900] Loss[4.237] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[900] rmse=0.021316 lr=0.337323 [1,0]:INFO:root:Epoch[73] Batch[1000] Loss[2.674] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[1000] rmse=0.021301 lr=0.337185 [1,0]:INFO:root:Epoch[73] Batch[1100] Loss[4.007] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[1100] rmse=0.021310 lr=0.337047 [1,0]:INFO:root:Epoch[73] Batch[1200] Loss[2.827] [1,0]:INFO:root:Epoch[73] Rank[0] Batch[1200] rmse=0.021311 lr=0.336909 [1,0]:INFO:root:Epoch[73] Rank[0] Batch[1251] Time cost=400.25 Train-metric=0.021305 [1,0]:INFO:root:Epoch[73] Speed: 3200.54 samples/sec [1,0]:INFO:root:Epoch[74] Batch[100] Loss[2.757] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[100] rmse=0.021104 lr=0.336701 [1,0]:INFO:root:Epoch[74] Batch[200] Loss[4.952] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[200] rmse=0.021145 lr=0.336562 [1,0]:INFO:root:Epoch[74] Batch[300] Loss[3.083] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[300] rmse=0.021183 lr=0.336424 [1,0]:INFO:root:Epoch[74] Batch[400] Loss[2.756] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[400] rmse=0.021252 lr=0.336285 [1,0]:INFO:root:Epoch[74] Batch[500] Loss[2.939] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[500] rmse=0.021242 lr=0.336147 [1,0]:INFO:root:Epoch[74] Batch[600] Loss[3.494] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[600] rmse=0.021248 lr=0.336008 [1,0]:INFO:root:Epoch[74] Batch[700] Loss[2.777] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[700] rmse=0.021216 lr=0.335869 [1,0]:INFO:root:Epoch[74] Batch[800] Loss[4.254] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[800] rmse=0.021236 lr=0.335730 [1,0]:INFO:root:Epoch[74] Batch[900] Loss[3.102] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[900] rmse=0.021231 lr=0.335591 [1,0]:INFO:root:Epoch[74] Batch[1000] Loss[2.863] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[1000] rmse=0.021218 lr=0.335451 [1,0]:INFO:root:Epoch[74] Batch[1100] Loss[3.175] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[1100] rmse=0.021231 lr=0.335312 [1,0]:INFO:root:Epoch[74] Batch[1200] Loss[5.522] [1,0]:INFO:root:Epoch[74] Rank[0] Batch[1200] rmse=0.021241 lr=0.335173 [1,0]:INFO:root:Epoch[74] Rank[0] Batch[1251] Time cost=399.05 Train-metric=0.021237 [1,0]:INFO:root:Epoch[74] Speed: 3210.22 samples/sec [1,0]:INFO:root:Epoch[74] Rank[0] Validation-accuracy=0.585600 Validation-top_k_accuracy_5=0.821700 [1,0]:INFO:root:Epoch[75] Batch[100] Loss[2.875] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[100] rmse=0.021281 lr=0.334962 [1,0]:INFO:root:Epoch[75] Batch[200] Loss[3.123] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[200] rmse=0.021141 lr=0.334822 [1,0]:INFO:root:Epoch[75] Batch[300] Loss[2.793] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[300] rmse=0.021136 lr=0.334682 [1,0]:INFO:root:Epoch[75] Batch[400] Loss[2.995] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[400] rmse=0.021057 lr=0.334542 [1,0]:INFO:root:Epoch[75] Batch[500] Loss[3.086] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[500] rmse=0.021140 lr=0.334402 [1,0]:INFO:root:Epoch[75] Batch[600] Loss[3.471] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[600] rmse=0.021142 lr=0.334261 [1,0]:INFO:root:Epoch[75] Batch[700] Loss[3.012] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[700] rmse=0.021153 lr=0.334121 [1,0]:INFO:root:Epoch[75] Batch[800] Loss[5.481] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[800] rmse=0.021148 lr=0.333980 [1,0]:INFO:root:Epoch[75] Batch[900] Loss[3.524] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[900] rmse=0.021164 lr=0.333840 [1,0]:INFO:root:Epoch[75] Batch[1000] Loss[3.039] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[1000] rmse=0.021184 lr=0.333699 [1,0]:INFO:root:Epoch[75] Batch[1100] Loss[5.142] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[1100] rmse=0.021169 lr=0.333558 [1,0]:INFO:root:Epoch[75] Batch[1200] Loss[3.558] [1,0]:INFO:root:Epoch[75] Rank[0] Batch[1200] rmse=0.021163 lr=0.333417 [1,0]:INFO:root:Epoch[75] Rank[0] Batch[1251] Time cost=398.86 Train-metric=0.021176 [1,0]:INFO:root:Epoch[75] Speed: 3211.74 samples/sec [1,0]:INFO:root:Epoch[76] Batch[100] Loss[3.200] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[100] rmse=0.021257 lr=0.333204 [1,0]:INFO:root:Epoch[76] Batch[200] Loss[3.357] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[200] rmse=0.021359 lr=0.333063 [1,0]:INFO:root:Epoch[76] Batch[300] Loss[4.021] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[300] rmse=0.021267 lr=0.332921 [1,0]:INFO:root:Epoch[76] Batch[400] Loss[2.919] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[400] rmse=0.021288 lr=0.332780 [1,0]:INFO:root:Epoch[76] Batch[500] Loss[5.363] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[500] rmse=0.021243 lr=0.332638 [1,0]:INFO:root:Epoch[76] Batch[600] Loss[5.322] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[600] rmse=0.021244 lr=0.332496 [1,0]:INFO:root:Epoch[76] Batch[700] Loss[3.313] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[700] rmse=0.021243 lr=0.332354 [1,0]:INFO:root:Epoch[76] Batch[800] Loss[5.376] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[800] rmse=0.021232 lr=0.332212 [1,0]:INFO:root:Epoch[76] Batch[900] Loss[3.013] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[900] rmse=0.021232 lr=0.332070 [1,0]:INFO:root:Epoch[76] Batch[1000] Loss[5.460] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[1000] rmse=0.021251 lr=0.331928 [1,0]:INFO:root:Epoch[76] Batch[1100] Loss[5.197] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[1100] rmse=0.021244 lr=0.331785 [1,0]:INFO:root:Epoch[76] Batch[1200] Loss[5.002] [1,0]:INFO:root:Epoch[76] Rank[0] Batch[1200] rmse=0.021256 lr=0.331643 [1,0]:INFO:root:Epoch[76] Rank[0] Batch[1251] Time cost=398.67 Train-metric=0.021262 [1,0]:INFO:root:Epoch[76] Speed: 3213.22 samples/sec [1,0]:INFO:root:Epoch[77] Batch[100] Loss[2.908] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[100] rmse=0.021166 lr=0.331427 [1,0]:INFO:root:Epoch[77] Batch[200] Loss[5.103] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[200] rmse=0.021201 lr=0.331285 [1,0]:INFO:root:Epoch[77] Batch[300] Loss[3.030] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[300] rmse=0.021170 lr=0.331142 [1,0]:INFO:root:Epoch[77] Batch[400] Loss[2.869] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[400] rmse=0.021171 lr=0.330999 [1,0]:INFO:root:Epoch[77] Batch[500] Loss[2.973] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[500] rmse=0.021144 lr=0.330855 [1,0]:INFO:root:Epoch[77] Batch[600] Loss[3.242] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[600] rmse=0.021138 lr=0.330712 [1,0]:INFO:root:Epoch[77] Batch[700] Loss[5.130] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[700] rmse=0.021168 lr=0.330569 [1,0]:INFO:root:Epoch[77] Batch[800] Loss[3.109] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[800] rmse=0.021173 lr=0.330425 [1,0]:INFO:root:Epoch[77] Batch[900] Loss[4.981] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[900] rmse=0.021187 lr=0.330282 [1,0]:INFO:root:Epoch[77] Batch[1000] Loss[5.067] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[1000] rmse=0.021217 lr=0.330138 [1,0]:INFO:root:Epoch[77] Batch[1100] Loss[4.083] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[1100] rmse=0.021198 lr=0.329994 [1,0]:INFO:root:Epoch[77] Batch[1200] Loss[3.672] [1,0]:INFO:root:Epoch[77] Rank[0] Batch[1200] rmse=0.021196 lr=0.329850 [1,0]:INFO:root:Epoch[77] Rank[0] Batch[1251] Time cost=399.88 Train-metric=0.021192 [1,0]:INFO:root:Epoch[77] Speed: 3203.48 samples/sec [1,0]:INFO:root:Epoch[78] Batch[100] Loss[2.922] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[100] rmse=0.021180 lr=0.329632 [1,0]:INFO:root:Epoch[78] Batch[200] Loss[2.858] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[200] rmse=0.021156 lr=0.329488 [1,0]:INFO:root:Epoch[78] Batch[300] Loss[3.763] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[300] rmse=0.021203 lr=0.329344 [1,0]:INFO:root:Epoch[78] Batch[400] Loss[2.992] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[400] rmse=0.021215 lr=0.329199 [1,0]:INFO:root:Epoch[78] Batch[500] Loss[2.976] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[500] rmse=0.021255 lr=0.329055 [1,0]:INFO:root:Epoch[78] Batch[600] Loss[3.264] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[600] rmse=0.021249 lr=0.328910 [1,0]:INFO:root:Epoch[78] Batch[700] Loss[3.068] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[700] rmse=0.021248 lr=0.328765 [1,0]:INFO:root:Epoch[78] Batch[800] Loss[2.909] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[800] rmse=0.021260 lr=0.328620 [1,0]:INFO:root:Epoch[78] Batch[900] Loss[3.494] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[900] rmse=0.021247 lr=0.328475 [1,0]:INFO:root:Epoch[78] Batch[1000] Loss[4.751] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[1000] rmse=0.021256 lr=0.328330 [1,0]:INFO:root:Epoch[78] Batch[1100] Loss[2.974] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[1100] rmse=0.021255 lr=0.328184 [1,0]:INFO:root:Epoch[78] Batch[1200] Loss[2.889] [1,0]:INFO:root:Epoch[78] Rank[0] Batch[1200] rmse=0.021259 lr=0.328039 [1,0]:INFO:root:Epoch[78] Rank[0] Batch[1251] Time cost=399.02 Train-metric=0.021267 [1,0]:INFO:root:Epoch[78] Speed: 3210.41 samples/sec [1,0]:INFO:root:Epoch[79] Batch[100] Loss[2.936] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[100] rmse=0.021078 lr=0.327819 [1,0]:INFO:root:Epoch[79] Batch[200] Loss[3.049] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[200] rmse=0.021160 lr=0.327674 [1,0]:INFO:root:Epoch[79] Batch[300] Loss[3.479] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[300] rmse=0.021093 lr=0.327528 [1,0]:INFO:root:Epoch[79] Batch[400] Loss[4.938] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[400] rmse=0.021120 lr=0.327382 [1,0]:INFO:root:Epoch[79] Batch[500] Loss[3.743] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[500] rmse=0.021177 lr=0.327236 [1,0]:INFO:root:Epoch[79] Batch[600] Loss[4.719] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[600] rmse=0.021139 lr=0.327090 [1,0]:INFO:root:Epoch[79] Batch[700] Loss[3.353] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[700] rmse=0.021146 lr=0.326943 [1,0]:INFO:root:Epoch[79] Batch[800] Loss[5.276] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[800] rmse=0.021179 lr=0.326797 [1,0]:INFO:root:Epoch[79] Batch[900] Loss[3.222] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[900] rmse=0.021170 lr=0.326650 [1,0]:INFO:root:Epoch[79] Batch[1000] Loss[3.096] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[1000] rmse=0.021182 lr=0.326504 [1,0]:INFO:root:Epoch[79] Batch[1100] Loss[5.058] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[1100] rmse=0.021200 lr=0.326357 [1,0]:INFO:root:Epoch[79] Batch[1200] Loss[3.500] [1,0]:INFO:root:Epoch[79] Rank[0] Batch[1200] rmse=0.021190 lr=0.326210 [1,0]:INFO:root:Epoch[79] Rank[0] Batch[1251] Time cost=399.34 Train-metric=0.021195 [1,0]:INFO:root:Epoch[79] Speed: 3207.89 samples/sec [1,0]:INFO:root:Epoch[79] Rank[0] Validation-accuracy=0.598280 Validation-top_k_accuracy_5=0.833720 [1,0]:INFO:root:Epoch[80] Batch[100] Loss[4.197] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[100] rmse=0.021067 lr=0.325988 [1,0]:INFO:root:Epoch[80] Batch[200] Loss[3.863] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[200] rmse=0.021048 lr=0.325841 [1,0]:INFO:root:Epoch[80] Batch[300] Loss[2.943] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[300] rmse=0.020991 lr=0.325694 [1,0]:INFO:root:Epoch[80] Batch[400] Loss[2.894] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[400] rmse=0.021035 lr=0.325546 [1,0]:INFO:root:Epoch[80] Batch[500] Loss[5.414] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[500] rmse=0.021080 lr=0.325399 [1,0]:INFO:root:Epoch[80] Batch[600] Loss[2.842] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[600] rmse=0.021117 lr=0.325251 [1,0]:INFO:root:Epoch[80] Batch[700] Loss[3.288] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[700] rmse=0.021099 lr=0.325104 [1,0]:INFO:root:Epoch[80] Batch[800] Loss[2.980] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[800] rmse=0.021108 lr=0.324956 [1,0]:INFO:root:Epoch[80] Batch[900] Loss[3.093] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[900] rmse=0.021132 lr=0.324808 [1,0]:INFO:root:Epoch[80] Batch[1000] Loss[5.229] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[1000] rmse=0.021170 lr=0.324660 [1,0]:INFO:root:Epoch[80] Batch[1100] Loss[2.964] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[1100] rmse=0.021163 lr=0.324512 [1,0]:INFO:root:Epoch[80] Batch[1200] Loss[3.075] [1,0]:INFO:root:Epoch[80] Rank[0] Batch[1200] rmse=0.021178 lr=0.324363 [1,0]:INFO:root:Epoch[80] Rank[0] Batch[1251] Time cost=398.42 Train-metric=0.021183 [1,0]:INFO:root:Epoch[80] Speed: 3215.30 samples/sec [1,0]:INFO:root:Epoch[81] Batch[100] Loss[2.797] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[100] rmse=0.021185 lr=0.324139 [1,0]:INFO:root:Epoch[81] Batch[200] Loss[2.879] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[200] rmse=0.021193 lr=0.323991 [1,0]:INFO:root:Epoch[81] Batch[300] Loss[5.346] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[300] rmse=0.021122 lr=0.323842 [1,0]:INFO:root:Epoch[81] Batch[400] Loss[3.350] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[400] rmse=0.021106 lr=0.323693 [1,0]:INFO:root:Epoch[81] Batch[500] Loss[3.180] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[500] rmse=0.021121 lr=0.323545 [1,0]:INFO:root:Epoch[81] Batch[600] Loss[3.052] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[600] rmse=0.021110 lr=0.323396 [1,0]:INFO:root:Epoch[81] Batch[700] Loss[4.462] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[700] rmse=0.021128 lr=0.323247 [1,0]:INFO:root:Epoch[81] Batch[800] Loss[3.103] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[800] rmse=0.021142 lr=0.323097 [1,0]:INFO:root:Epoch[81] Batch[900] Loss[4.484] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[900] rmse=0.021144 lr=0.322948 [1,0]:INFO:root:Epoch[81] Batch[1000] Loss[3.138] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[1000] rmse=0.021140 lr=0.322799 [1,0]:INFO:root:Epoch[81] Batch[1100] Loss[2.672] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[1100] rmse=0.021130 lr=0.322649 [1,0]:INFO:root:Epoch[81] Batch[1200] Loss[5.064] [1,0]:INFO:root:Epoch[81] Rank[0] Batch[1200] rmse=0.021145 lr=0.322499 [1,0]:INFO:root:Epoch[81] Rank[0] Batch[1251] Time cost=399.23 Train-metric=0.021153 [1,0]:INFO:root:Epoch[81] Speed: 3208.72 samples/sec [1,0]:INFO:root:Epoch[82] Batch[100] Loss[4.081] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[100] rmse=0.021187 lr=0.322273 [1,0]:INFO:root:Epoch[82] Batch[200] Loss[3.920] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[200] rmse=0.021152 lr=0.322123 [1,0]:INFO:root:Epoch[82] Batch[300] Loss[3.294] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[300] rmse=0.021132 lr=0.321973 [1,0]:INFO:root:Epoch[82] Batch[400] Loss[3.689] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[400] rmse=0.021157 lr=0.321823 [1,0]:INFO:root:Epoch[82] Batch[500] Loss[2.954] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[500] rmse=0.021180 lr=0.321673 [1,0]:INFO:root:Epoch[82] Batch[600] Loss[5.294] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[600] rmse=0.021217 lr=0.321523 [1,0]:INFO:root:Epoch[82] Batch[700] Loss[2.972] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[700] rmse=0.021237 lr=0.321372 [1,0]:INFO:root:Epoch[82] Batch[800] Loss[2.834] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[800] rmse=0.021238 lr=0.321221 [1,0]:INFO:root:Epoch[82] Batch[900] Loss[3.453] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[900] rmse=0.021245 lr=0.321071 [1,0]:INFO:root:Epoch[82] Batch[1000] Loss[3.437] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[1000] rmse=0.021245 lr=0.320920 [1,0]:INFO:root:Epoch[82] Batch[1100] Loss[3.439] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[1100] rmse=0.021233 lr=0.320769 [1,0]:INFO:root:Epoch[82] Batch[1200] Loss[3.055] [1,0]:INFO:root:Epoch[82] Rank[0] Batch[1200] rmse=0.021236 lr=0.320618 [1,0]:INFO:root:Epoch[82] Rank[0] Batch[1251] Time cost=399.47 Train-metric=0.021230 [1,0]:INFO:root:Epoch[82] Speed: 3206.77 samples/sec [1,0]:INFO:root:Epoch[83] Batch[100] Loss[2.815] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[100] rmse=0.021168 lr=0.320390 [1,0]:INFO:root:Epoch[83] Batch[200] Loss[3.359] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[200] rmse=0.021143 lr=0.320239 [1,0]:INFO:root:Epoch[83] Batch[300] Loss[3.578] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[300] rmse=0.021122 lr=0.320087 [1,0]:INFO:root:Epoch[83] Batch[400] Loss[2.720] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[400] rmse=0.021152 lr=0.319936 [1,0]:INFO:root:Epoch[83] Batch[500] Loss[2.616] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[500] rmse=0.021107 lr=0.319784 [1,0]:INFO:root:Epoch[83] Batch[600] Loss[4.774] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[600] rmse=0.021123 lr=0.319632 [1,0]:INFO:root:Epoch[83] Batch[700] Loss[4.192] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[700] rmse=0.021113 lr=0.319481 [1,0]:INFO:root:Epoch[83] Batch[800] Loss[2.774] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[800] rmse=0.021117 lr=0.319329 [1,0]:INFO:root:Epoch[83] Batch[900] Loss[2.820] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[900] rmse=0.021137 lr=0.319177 [1,0]:INFO:root:Epoch[83] Batch[1000] Loss[3.955] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[1000] rmse=0.021143 lr=0.319025 [1,0]:INFO:root:Epoch[83] Batch[1100] Loss[2.662] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[1100] rmse=0.021131 lr=0.318872 [1,0]:INFO:root:Epoch[83] Batch[1200] Loss[3.917] [1,0]:INFO:root:Epoch[83] Rank[0] Batch[1200] rmse=0.021122 lr=0.318720 [1,0]:INFO:root:Epoch[83] Rank[0] Batch[1251] Time cost=398.98 Train-metric=0.021127 [1,0]:INFO:root:Epoch[83] Speed: 3210.76 samples/sec [1,0]:INFO:root:Epoch[84] Batch[100] Loss[3.633] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[100] rmse=0.021126 lr=0.318490 [1,0]:INFO:root:Epoch[84] Batch[200] Loss[3.567] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[200] rmse=0.021088 lr=0.318337 [1,0]:INFO:root:Epoch[84] Batch[300] Loss[2.859] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[300] rmse=0.021114 lr=0.318184 [1,0]:INFO:root:Epoch[84] Batch[400] Loss[2.840] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[400] rmse=0.021110 lr=0.318031 [1,0]:INFO:root:Epoch[84] Batch[500] Loss[2.905] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[500] rmse=0.021117 lr=0.317879 [1,0]:INFO:root:Epoch[84] Batch[600] Loss[4.889] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[600] rmse=0.021101 lr=0.317726 [1,0]:INFO:root:Epoch[84] Batch[700] Loss[3.141] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[700] rmse=0.021116 lr=0.317572 [1,0]:INFO:root:Epoch[84] Batch[800] Loss[5.414] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[800] rmse=0.021109 lr=0.317419 [1,0]:INFO:root:Epoch[84] Batch[900] Loss[2.854] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[900] rmse=0.021110 lr=0.317266 [1,0]:INFO:root:Epoch[84] Batch[1000] Loss[3.486] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[1000] rmse=0.021119 lr=0.317112 [1,0]:INFO:root:Epoch[84] Batch[1100] Loss[4.454] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[1100] rmse=0.021114 lr=0.316959 [1,0]:INFO:root:Epoch[84] Batch[1200] Loss[3.208] [1,0]:INFO:root:Epoch[84] Rank[0] Batch[1200] rmse=0.021132 lr=0.316805 [1,0]:INFO:root:Epoch[84] Rank[0] Batch[1251] Time cost=398.99 Train-metric=0.021144 [1,0]:INFO:root:Epoch[84] Speed: 3210.63 samples/sec [1,0]:INFO:root:Epoch[84] Rank[0] Validation-accuracy=0.599040 Validation-top_k_accuracy_5=0.832480 [1,0]:INFO:root:Epoch[85] Batch[100] Loss[3.041] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[100] rmse=0.021189 lr=0.316573 [1,0]:INFO:root:Epoch[85] Batch[200] Loss[2.737] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[200] rmse=0.021054 lr=0.316419 [1,0]:INFO:root:Epoch[85] Batch[300] Loss[5.308] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[300] rmse=0.021059 lr=0.316265 [1,0]:INFO:root:Epoch[85] Batch[400] Loss[3.082] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[400] rmse=0.021097 lr=0.316111 [1,0]:INFO:root:Epoch[85] Batch[500] Loss[3.265] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[500] rmse=0.021089 lr=0.315956 [1,0]:INFO:root:Epoch[85] Batch[600] Loss[4.776] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[600] rmse=0.021088 lr=0.315802 [1,0]:INFO:root:Epoch[85] Batch[700] Loss[3.337] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[700] rmse=0.021093 lr=0.315648 [1,0]:INFO:root:Epoch[85] Batch[800] Loss[4.677] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[800] rmse=0.021109 lr=0.315493 [1,0]:INFO:root:Epoch[85] Batch[900] Loss[3.128] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[900] rmse=0.021064 lr=0.315338 [1,0]:INFO:root:Epoch[85] Batch[1000] Loss[2.987] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[1000] rmse=0.021099 lr=0.315184 [1,0]:INFO:root:Epoch[85] Batch[1100] Loss[3.727] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[1100] rmse=0.021108 lr=0.315029 [1,0]:INFO:root:Epoch[85] Batch[1200] Loss[4.450] [1,0]:INFO:root:Epoch[85] Rank[0] Batch[1200] rmse=0.021108 lr=0.314874 [1,0]:INFO:root:Epoch[85] Rank[0] Batch[1251] Time cost=399.15 Train-metric=0.021105 [1,0]:INFO:root:Epoch[85] Speed: 3209.41 samples/sec [1,0]:INFO:root:Epoch[86] Batch[100] Loss[3.561] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[100] rmse=0.021343 lr=0.314640 [1,0]:INFO:root:Epoch[86] Batch[200] Loss[2.786] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[200] rmse=0.021185 lr=0.314484 [1,0]:INFO:root:Epoch[86] Batch[300] Loss[2.870] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[300] rmse=0.021168 lr=0.314329 [1,0]:INFO:root:Epoch[86] Batch[400] Loss[5.382] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[400] rmse=0.021106 lr=0.314174 [1,0]:INFO:root:Epoch[86] Batch[500] Loss[2.655] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[500] rmse=0.021116 lr=0.314018 [1,0]:INFO:root:Epoch[86] Batch[600] Loss[3.329] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[600] rmse=0.021153 lr=0.313862 [1,0]:INFO:root:Epoch[86] Batch[700] Loss[2.816] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[700] rmse=0.021142 lr=0.313707 [1,0]:INFO:root:Epoch[86] Batch[800] Loss[2.830] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[800] rmse=0.021108 lr=0.313551 [1,0]:INFO:root:Epoch[86] Batch[900] Loss[3.127] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[900] rmse=0.021127 lr=0.313395 [1,0]:INFO:root:Epoch[86] Batch[1000] Loss[4.704] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[1000] rmse=0.021137 lr=0.313239 [1,0]:INFO:root:Epoch[86] Batch[1100] Loss[3.115] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[1100] rmse=0.021122 lr=0.313083 [1,0]:INFO:root:Epoch[86] Batch[1200] Loss[2.784] [1,0]:INFO:root:Epoch[86] Rank[0] Batch[1200] rmse=0.021115 lr=0.312926 [1,0]:INFO:root:Epoch[86] Rank[0] Batch[1251] Time cost=399.45 Train-metric=0.021104 [1,0]:INFO:root:Epoch[86] Speed: 3206.97 samples/sec [1,0]:INFO:root:Epoch[87] Batch[100] Loss[3.440] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[100] rmse=0.020928 lr=0.312690 [1,0]:INFO:root:Epoch[87] Batch[200] Loss[3.104] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[200] rmse=0.020948 lr=0.312534 [1,0]:INFO:root:Epoch[87] Batch[300] Loss[2.993] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[300] rmse=0.021046 lr=0.312377 [1,0]:INFO:root:Epoch[87] Batch[400] Loss[3.035] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[400] rmse=0.021118 lr=0.312220 [1,0]:INFO:root:Epoch[87] Batch[500] Loss[3.513] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[500] rmse=0.021149 lr=0.312064 [1,0]:INFO:root:Epoch[87] Batch[600] Loss[2.867] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[600] rmse=0.021207 lr=0.311907 [1,0]:INFO:root:Epoch[87] Batch[700] Loss[3.032] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[700] rmse=0.021212 lr=0.311750 [1,0]:INFO:root:Epoch[87] Batch[800] Loss[2.909] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[800] rmse=0.021186 lr=0.311593 [1,0]:INFO:root:Epoch[87] Batch[900] Loss[5.385] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[900] rmse=0.021146 lr=0.311435 [1,0]:INFO:root:Epoch[87] Batch[1000] Loss[4.886] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[1000] rmse=0.021137 lr=0.311278 [1,0]:INFO:root:Epoch[87] Batch[1100] Loss[4.888] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[1100] rmse=0.021135 lr=0.311121 [1,0]:INFO:root:Epoch[87] Batch[1200] Loss[3.017] [1,0]:INFO:root:Epoch[87] Rank[0] Batch[1200] rmse=0.021142 lr=0.310963 [1,0]:INFO:root:Epoch[87] Rank[0] Batch[1251] Time cost=399.40 Train-metric=0.021155 [1,0]:INFO:root:Epoch[87] Speed: 3207.39 samples/sec [1,0]:INFO:root:Epoch[88] Batch[100] Loss[3.124] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[100] rmse=0.021127 lr=0.310725 [1,0]:INFO:root:Epoch[88] Batch[200] Loss[3.254] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[200] rmse=0.021205 lr=0.310567 [1,0]:INFO:root:Epoch[88] Batch[300] Loss[5.539] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[300] rmse=0.021169 lr=0.310410 [1,0]:INFO:root:Epoch[88] Batch[400] Loss[3.037] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[400] rmse=0.021135 lr=0.310252 [1,0]:INFO:root:Epoch[88] Batch[500] Loss[3.252] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[500] rmse=0.021118 lr=0.310094 [1,0]:INFO:root:Epoch[88] Batch[600] Loss[3.407] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[600] rmse=0.021088 lr=0.309935 [1,0]:INFO:root:Epoch[88] Batch[700] Loss[4.120] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[700] rmse=0.021089 lr=0.309777 [1,0]:INFO:root:Epoch[88] Batch[800] Loss[5.405] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[800] rmse=0.021091 lr=0.309619 [1,0]:INFO:root:Epoch[88] Batch[900] Loss[2.732] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[900] rmse=0.021085 lr=0.309460 [1,0]:INFO:root:Epoch[88] Batch[1000] Loss[3.361] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[1000] rmse=0.021095 lr=0.309302 [1,0]:INFO:root:Epoch[88] Batch[1100] Loss[3.453] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[1100] rmse=0.021096 lr=0.309143 [1,0]:INFO:root:Epoch[88] Batch[1200] Loss[2.680] [1,0]:INFO:root:Epoch[88] Rank[0] Batch[1200] rmse=0.021111 lr=0.308984 [1,0]:INFO:root:Epoch[88] Rank[0] Batch[1251] Time cost=399.46 Train-metric=0.021113 [1,0]:INFO:root:Epoch[88] Speed: 3206.92 samples/sec [1,0]:INFO:root:Epoch[89] Batch[100] Loss[5.354] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[100] rmse=0.021053 lr=0.308745 [1,0]:INFO:root:Epoch[89] Batch[200] Loss[2.837] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[200] rmse=0.021075 lr=0.308586 [1,0]:INFO:root:Epoch[89] Batch[300] Loss[2.869] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[300] rmse=0.021129 lr=0.308426 [1,0]:INFO:root:Epoch[89] Batch[400] Loss[3.302] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[400] rmse=0.021091 lr=0.308267 [1,0]:INFO:root:Epoch[89] Batch[500] Loss[3.008] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[500] rmse=0.021068 lr=0.308108 [1,0]:INFO:root:Epoch[89] Batch[600] Loss[4.943] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[600] rmse=0.021073 lr=0.307949 [1,0]:INFO:root:Epoch[89] Batch[700] Loss[4.848] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[700] rmse=0.021105 lr=0.307789 [1,0]:INFO:root:Epoch[89] Batch[800] Loss[3.206] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[800] rmse=0.021089 lr=0.307630 [1,0]:INFO:root:Epoch[89] Batch[900] Loss[3.593] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[900] rmse=0.021099 lr=0.307470 [1,0]:INFO:root:Epoch[89] Batch[1000] Loss[3.011] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[1000] rmse=0.021102 lr=0.307310 [1,0]:INFO:root:Epoch[89] Batch[1100] Loss[2.955] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[1100] rmse=0.021106 lr=0.307150 [1,0]:INFO:root:Epoch[89] Batch[1200] Loss[3.199] [1,0]:INFO:root:Epoch[89] Rank[0] Batch[1200] rmse=0.021128 lr=0.306990 [1,0]:INFO:root:Epoch[89] Rank[0] Batch[1251] Time cost=399.32 Train-metric=0.021124 [1,0]:INFO:root:Epoch[89] Speed: 3208.03 samples/sec [1,0]:INFO:root:Epoch[89] Rank[0] Validation-accuracy=0.588660 Validation-top_k_accuracy_5=0.822760 [1,0]:INFO:root:Epoch[90] Batch[100] Loss[3.013] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[100] rmse=0.020665 lr=0.306749 [1,0]:INFO:root:Epoch[90] Batch[200] Loss[3.856] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[200] rmse=0.020732 lr=0.306588 [1,0]:INFO:root:Epoch[90] Batch[300] Loss[4.015] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[300] rmse=0.020917 lr=0.306428 [1,0]:INFO:root:Epoch[90] Batch[400] Loss[4.100] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[400] rmse=0.020943 lr=0.306268 [1,0]:INFO:root:Epoch[90] Batch[500] Loss[2.759] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[500] rmse=0.021011 lr=0.306107 [1,0]:INFO:root:Epoch[90] Batch[600] Loss[5.037] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[600] rmse=0.021022 lr=0.305947 [1,0]:INFO:root:Epoch[90] Batch[700] Loss[3.168] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[700] rmse=0.021026 lr=0.305786 [1,0]:INFO:root:Epoch[90] Batch[800] Loss[2.803] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[800] rmse=0.021058 lr=0.305625 [1,0]:INFO:root:Epoch[90] Batch[900] Loss[5.361] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[900] rmse=0.021074 lr=0.305464 [1,0]:INFO:root:Epoch[90] Batch[1000] Loss[2.880] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[1000] rmse=0.021083 lr=0.305303 [1,0]:INFO:root:Epoch[90] Batch[1100] Loss[4.901] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[1100] rmse=0.021079 lr=0.305142 [1,0]:INFO:root:Epoch[90] Batch[1200] Loss[5.329] [1,0]:INFO:root:Epoch[90] Rank[0] Batch[1200] rmse=0.021100 lr=0.304981 [1,0]:INFO:root:Epoch[90] Rank[0] Batch[1251] Time cost=398.93 Train-metric=0.021105 [1,0]:INFO:root:Epoch[90] Speed: 3211.12 samples/sec [1,0]:INFO:root:Epoch[91] Batch[100] Loss[2.812] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[100] rmse=0.021068 lr=0.304738 [1,0]:INFO:root:Epoch[91] Batch[200] Loss[5.519] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[200] rmse=0.020965 lr=0.304576 [1,0]:INFO:root:Epoch[91] Batch[300] Loss[3.159] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[300] rmse=0.021001 lr=0.304415 [1,0]:INFO:root:Epoch[91] Batch[400] Loss[3.083] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[400] rmse=0.020997 lr=0.304253 [1,0]:INFO:root:Epoch[91] Batch[500] Loss[3.119] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[500] rmse=0.021041 lr=0.304092 [1,0]:INFO:root:Epoch[91] Batch[600] Loss[3.022] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[600] rmse=0.021061 lr=0.303930 [1,0]:INFO:root:Epoch[91] Batch[700] Loss[2.874] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[700] rmse=0.021086 lr=0.303768 [1,0]:INFO:root:Epoch[91] Batch[800] Loss[4.648] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[800] rmse=0.021103 lr=0.303606 [1,0]:INFO:root:Epoch[91] Batch[900] Loss[3.020] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[900] rmse=0.021104 lr=0.303444 [1,0]:INFO:root:Epoch[91] Batch[1000] Loss[5.217] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[1000] rmse=0.021101 lr=0.303282 [1,0]:INFO:root:Epoch[91] Batch[1100] Loss[4.042] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[1100] rmse=0.021112 lr=0.303120 [1,0]:INFO:root:Epoch[91] Batch[1200] Loss[2.755] [1,0]:INFO:root:Epoch[91] Rank[0] Batch[1200] rmse=0.021115 lr=0.302957 [1,0]:INFO:root:Epoch[91] Rank[0] Batch[1251] Time cost=399.44 Train-metric=0.021116 [1,0]:INFO:root:Epoch[91] Speed: 3207.06 samples/sec [1,0]:INFO:root:Epoch[92] Batch[100] Loss[4.780] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[100] rmse=0.020955 lr=0.302712 [1,0]:INFO:root:Epoch[92] Batch[200] Loss[5.159] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[200] rmse=0.021030 lr=0.302550 [1,0]:INFO:root:Epoch[92] Batch[300] Loss[2.868] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[300] rmse=0.021046 lr=0.302387 [1,0]:INFO:root:Epoch[92] Batch[400] Loss[2.700] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[400] rmse=0.021053 lr=0.302224 [1,0]:INFO:root:Epoch[92] Batch[500] Loss[3.045] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[500] rmse=0.021066 lr=0.302061 [1,0]:INFO:root:Epoch[92] Batch[600] Loss[4.004] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[600] rmse=0.021088 lr=0.301899 [1,0]:INFO:root:Epoch[92] Batch[700] Loss[5.272] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[700] rmse=0.021082 lr=0.301736 [1,0]:INFO:root:Epoch[92] Batch[800] Loss[5.314] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[800] rmse=0.021088 lr=0.301572 [1,0]:INFO:root:Epoch[92] Batch[900] Loss[3.158] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[900] rmse=0.021107 lr=0.301409 [1,0]:INFO:root:Epoch[92] Batch[1000] Loss[3.115] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[1000] rmse=0.021106 lr=0.301246 [1,0]:INFO:root:Epoch[92] Batch[1100] Loss[5.352] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[1100] rmse=0.021111 lr=0.301083 [1,0]:INFO:root:Epoch[92] Batch[1200] Loss[2.966] [1,0]:INFO:root:Epoch[92] Rank[0] Batch[1200] rmse=0.021107 lr=0.300919 [1,0]:INFO:root:Epoch[92] Rank[0] Batch[1251] Time cost=398.48 Train-metric=0.021107 [1,0]:INFO:root:Epoch[92] Speed: 3214.81 samples/sec [1,0]:INFO:root:Epoch[93] Batch[100] Loss[4.892] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[100] rmse=0.020893 lr=0.300672 [1,0]:INFO:root:Epoch[93] Batch[200] Loss[2.842] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[200] rmse=0.020826 lr=0.300509 [1,0]:INFO:root:Epoch[93] Batch[300] Loss[3.177] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[300] rmse=0.020920 lr=0.300345 [1,0]:INFO:root:Epoch[93] Batch[400] Loss[3.029] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[400] rmse=0.020930 lr=0.300181 [1,0]:INFO:root:Epoch[93] Batch[500] Loss[2.920] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[500] rmse=0.020927 lr=0.300017 [1,0]:INFO:root:Epoch[93] Batch[600] Loss[3.756] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[600] rmse=0.020962 lr=0.299853 [1,0]:INFO:root:Epoch[93] Batch[700] Loss[3.454] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[700] rmse=0.020983 lr=0.299689 [1,0]:INFO:root:Epoch[93] Batch[800] Loss[2.715] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[800] rmse=0.021001 lr=0.299525 [1,0]:INFO:root:Epoch[93] Batch[900] Loss[3.110] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[900] rmse=0.021040 lr=0.299360 [1,0]:INFO:root:Epoch[93] Batch[1000] Loss[3.830] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[1000] rmse=0.021056 lr=0.299196 [1,0]:INFO:root:Epoch[93] Batch[1100] Loss[3.052] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[1100] rmse=0.021071 lr=0.299031 [1,0]:INFO:root:Epoch[93] Batch[1200] Loss[4.080] [1,0]:INFO:root:Epoch[93] Rank[0] Batch[1200] rmse=0.021080 lr=0.298867 [1,0]:INFO:root:Epoch[93] Rank[0] Batch[1251] Time cost=399.19 Train-metric=0.021077 [1,0]:INFO:root:Epoch[93] Speed: 3209.09 samples/sec [1,0]:INFO:root:Epoch[94] Batch[100] Loss[3.244] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[100] rmse=0.021015 lr=0.298618 [1,0]:INFO:root:Epoch[94] Batch[200] Loss[3.181] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[200] rmse=0.020889 lr=0.298453 [1,0]:INFO:root:Epoch[94] Batch[300] Loss[2.910] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[300] rmse=0.021017 lr=0.298288 [1,0]:INFO:root:Epoch[94] Batch[400] Loss[2.681] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[400] rmse=0.021002 lr=0.298123 [1,0]:INFO:root:Epoch[94] Batch[500] Loss[2.796] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[500] rmse=0.021075 lr=0.297958 [1,0]:INFO:root:Epoch[94] Batch[600] Loss[3.095] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[600] rmse=0.021061 lr=0.297793 [1,0]:INFO:root:Epoch[94] Batch[700] Loss[3.247] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[700] rmse=0.021052 lr=0.297628 [1,0]:INFO:root:Epoch[94] Batch[800] Loss[2.811] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[800] rmse=0.021055 lr=0.297463 [1,0]:INFO:root:Epoch[94] Batch[900] Loss[2.657] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[900] rmse=0.021072 lr=0.297297 [1,0]:INFO:root:Epoch[94] Batch[1000] Loss[3.087] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[1000] rmse=0.021072 lr=0.297132 [1,0]:INFO:root:Epoch[94] Batch[1100] Loss[2.945] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[1100] rmse=0.021064 lr=0.296966 [1,0]:INFO:root:Epoch[94] Batch[1200] Loss[2.811] [1,0]:INFO:root:Epoch[94] Rank[0] Batch[1200] rmse=0.021061 lr=0.296800 [1,0]:INFO:root:Epoch[94] Rank[0] Batch[1251] Time cost=400.41 Train-metric=0.021064 [1,0]:INFO:root:Epoch[94] Speed: 3199.28 samples/sec [1,0]:INFO:root:Epoch[94] Rank[0] Validation-accuracy=0.604720 Validation-top_k_accuracy_5=0.834480 [1,0]:INFO:root:Epoch[95] Batch[100] Loss[2.679] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[100] rmse=0.020916 lr=0.296550 [1,0]:INFO:root:Epoch[95] Batch[200] Loss[2.686] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[200] rmse=0.020914 lr=0.296384 [1,0]:INFO:root:Epoch[95] Batch[300] Loss[3.007] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[300] rmse=0.020937 lr=0.296218 [1,0]:INFO:root:Epoch[95] Batch[400] Loss[3.187] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[400] rmse=0.020978 lr=0.296052 [1,0]:INFO:root:Epoch[95] Batch[500] Loss[5.196] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[500] rmse=0.021044 lr=0.295886 [1,0]:INFO:root:Epoch[95] Batch[600] Loss[3.143] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[600] rmse=0.021035 lr=0.295720 [1,0]:INFO:root:Epoch[95] Batch[700] Loss[4.565] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[700] rmse=0.021003 lr=0.295553 [1,0]:INFO:root:Epoch[95] Batch[800] Loss[4.031] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[800] rmse=0.021002 lr=0.295387 [1,0]:INFO:root:Epoch[95] Batch[900] Loss[4.993] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[900] rmse=0.020987 lr=0.295221 [1,0]:INFO:root:Epoch[95] Batch[1000] Loss[4.458] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[1000] rmse=0.020991 lr=0.295054 [1,0]:INFO:root:Epoch[95] Batch[1100] Loss[3.731] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[1100] rmse=0.021022 lr=0.294887 [1,0]:INFO:root:Epoch[95] Batch[1200] Loss[2.723] [1,0]:INFO:root:Epoch[95] Rank[0] Batch[1200] rmse=0.021026 lr=0.294721 [1,0]:INFO:root:Epoch[95] Rank[0] Batch[1251] Time cost=402.17 Train-metric=0.021035 [1,0]:INFO:root:Epoch[95] Speed: 3185.28 samples/sec [1,0]:INFO:root:Epoch[96] Batch[100] Loss[3.084] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[100] rmse=0.021150 lr=0.294469 [1,0]:INFO:root:Epoch[96] Batch[200] Loss[2.669] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[200] rmse=0.021011 lr=0.294302 [1,0]:INFO:root:Epoch[96] Batch[300] Loss[3.207] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[300] rmse=0.021060 lr=0.294135 [1,0]:INFO:root:Epoch[96] Batch[400] Loss[2.556] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[400] rmse=0.021059 lr=0.293968 [1,0]:INFO:root:Epoch[96] Batch[500] Loss[3.080] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[500] rmse=0.021052 lr=0.293800 [1,0]:INFO:root:Epoch[96] Batch[600] Loss[2.804] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[600] rmse=0.021035 lr=0.293633 [1,0]:INFO:root:Epoch[96] Batch[700] Loss[2.929] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[700] rmse=0.021046 lr=0.293466 [1,0]:INFO:root:Epoch[96] Batch[800] Loss[3.270] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[800] rmse=0.021050 lr=0.293298 [1,0]:INFO:root:Epoch[96] Batch[900] Loss[5.085] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[900] rmse=0.021050 lr=0.293131 [1,0]:INFO:root:Epoch[96] Batch[1000] Loss[4.671] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[1000] rmse=0.021051 lr=0.292963 [1,0]:INFO:root:Epoch[96] Batch[1100] Loss[2.905] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[1100] rmse=0.021056 lr=0.292795 [1,0]:INFO:root:Epoch[96] Batch[1200] Loss[2.945] [1,0]:INFO:root:Epoch[96] Rank[0] Batch[1200] rmse=0.021064 lr=0.292627 [1,0]:INFO:root:Epoch[96] Rank[0] Batch[1251] Time cost=403.18 Train-metric=0.021070 [1,0]:INFO:root:Epoch[96] Speed: 3177.31 samples/sec [1,0]:INFO:root:Epoch[97] Batch[100] Loss[4.244] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[100] rmse=0.020851 lr=0.292374 [1,0]:INFO:root:Epoch[97] Batch[200] Loss[2.789] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[200] rmse=0.020953 lr=0.292206 [1,0]:INFO:root:Epoch[97] Batch[300] Loss[4.082] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[300] rmse=0.020928 lr=0.292038 [1,0]:INFO:root:Epoch[97] Batch[400] Loss[3.284] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[400] rmse=0.020955 lr=0.291870 [1,0]:INFO:root:Epoch[97] Batch[500] Loss[4.269] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[500] rmse=0.020956 lr=0.291701 [1,0]:INFO:root:Epoch[97] Batch[600] Loss[4.670] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[600] rmse=0.020965 lr=0.291533 [1,0]:INFO:root:Epoch[97] Batch[700] Loss[4.368] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[700] rmse=0.021020 lr=0.291365 [1,0]:INFO:root:Epoch[97] Batch[800] Loss[2.771] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[800] rmse=0.021025 lr=0.291196 [1,0]:INFO:root:Epoch[97] Batch[900] Loss[2.747] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[900] rmse=0.021022 lr=0.291028 [1,0]:INFO:root:Epoch[97] Batch[1000] Loss[4.692] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[1000] rmse=0.021026 lr=0.290859 [1,0]:INFO:root:Epoch[97] Batch[1100] Loss[3.475] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[1100] rmse=0.021029 lr=0.290690 [1,0]:INFO:root:Epoch[97] Batch[1200] Loss[2.836] [1,0]:INFO:root:Epoch[97] Rank[0] Batch[1200] rmse=0.021025 lr=0.290521 [1,0]:INFO:root:Epoch[97] Rank[0] Batch[1251] Time cost=399.52 Train-metric=0.021020 [1,0]:INFO:root:Epoch[97] Speed: 3206.39 samples/sec [1,0]:INFO:root:Epoch[98] Batch[100] Loss[4.449] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[100] rmse=0.020840 lr=0.290266 [1,0]:INFO:root:Epoch[98] Batch[200] Loss[3.144] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[200] rmse=0.020870 lr=0.290097 [1,0]:INFO:root:Epoch[98] Batch[300] Loss[4.637] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[300] rmse=0.020904 lr=0.289928 [1,0]:INFO:root:Epoch[98] Batch[400] Loss[2.930] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[400] rmse=0.020956 lr=0.289759 [1,0]:INFO:root:Epoch[98] Batch[500] Loss[3.002] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[500] rmse=0.020987 lr=0.289590 [1,0]:INFO:root:Epoch[98] Batch[600] Loss[4.981] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[600] rmse=0.020920 lr=0.289420 [1,0]:INFO:root:Epoch[98] Batch[700] Loss[2.995] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[700] rmse=0.020944 lr=0.289251 [1,0]:INFO:root:Epoch[98] Batch[800] Loss[2.990] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[800] rmse=0.020951 lr=0.289081 [1,0]:INFO:root:Epoch[98] Batch[900] Loss[3.081] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[900] rmse=0.020945 lr=0.288912 [1,0]:INFO:root:Epoch[98] Batch[1000] Loss[3.144] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[1000] rmse=0.020959 lr=0.288742 [1,0]:INFO:root:Epoch[98] Batch[1100] Loss[4.498] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[1100] rmse=0.020943 lr=0.288572 [1,0]:INFO:root:Epoch[98] Batch[1200] Loss[3.273] [1,0]:INFO:root:Epoch[98] Rank[0] Batch[1200] rmse=0.020949 lr=0.288402 [1,0]:INFO:root:Epoch[98] Rank[0] Batch[1251] Time cost=399.16 Train-metric=0.020960 [1,0]:INFO:root:Epoch[98] Speed: 3209.32 samples/sec [1,0]:INFO:root:Epoch[99] Batch[100] Loss[3.178] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[100] rmse=0.021020 lr=0.288146 [1,0]:INFO:root:Epoch[99] Batch[200] Loss[3.648] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[200] rmse=0.020929 lr=0.287976 [1,0]:INFO:root:Epoch[99] Batch[300] Loss[3.922] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[300] rmse=0.020977 lr=0.287806 [1,0]:INFO:root:Epoch[99] Batch[400] Loss[3.536] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[400] rmse=0.020973 lr=0.287635 [1,0]:INFO:root:Epoch[99] Batch[500] Loss[3.056] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[500] rmse=0.020948 lr=0.287465 [1,0]:INFO:root:Epoch[99] Batch[600] Loss[3.231] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[600] rmse=0.020975 lr=0.287295 [1,0]:INFO:root:Epoch[99] Batch[700] Loss[3.732] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[700] rmse=0.021010 lr=0.287124 [1,0]:INFO:root:Epoch[99] Batch[800] Loss[3.444] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[800] rmse=0.021002 lr=0.286954 [1,0]:INFO:root:Epoch[99] Batch[900] Loss[2.791] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[900] rmse=0.020971 lr=0.286783 [1,0]:INFO:root:Epoch[99] Batch[1000] Loss[2.778] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[1000] rmse=0.020957 lr=0.286613 [1,0]:INFO:root:Epoch[99] Batch[1100] Loss[2.823] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[1100] rmse=0.020964 lr=0.286442 [1,0]:INFO:root:Epoch[99] Batch[1200] Loss[2.649] [1,0]:INFO:root:Epoch[99] Rank[0] Batch[1200] rmse=0.020971 lr=0.286271 [1,0]:INFO:root:Epoch[99] Rank[0] Batch[1251] Time cost=399.53 Train-metric=0.020990 [1,0]:INFO:root:Epoch[99] Speed: 3206.36 samples/sec [1,0]:INFO:root:Epoch[99] Rank[0] Validation-accuracy=0.596420 Validation-top_k_accuracy_5=0.831900 [1,0]:INFO:root:Epoch[100] Batch[100] Loss[2.886] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[100] rmse=0.020695 lr=0.286013 [1,0]:INFO:root:Epoch[100] Batch[200] Loss[5.084] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[200] rmse=0.020820 lr=0.285842 [1,0]:INFO:root:Epoch[100] Batch[300] Loss[2.883] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[300] rmse=0.020855 lr=0.285671 [1,0]:INFO:root:Epoch[100] Batch[400] Loss[4.013] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[400] rmse=0.020888 lr=0.285500 [1,0]:INFO:root:Epoch[100] Batch[500] Loss[3.710] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[500] rmse=0.020904 lr=0.285329 [1,0]:INFO:root:Epoch[100] Batch[600] Loss[5.536] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[600] rmse=0.020917 lr=0.285157 [1,0]:INFO:root:Epoch[100] Batch[700] Loss[2.918] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[700] rmse=0.020951 lr=0.284986 [1,0]:INFO:root:Epoch[100] Batch[800] Loss[2.845] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[800] rmse=0.020945 lr=0.284814 [1,0]:INFO:root:Epoch[100] Batch[900] Loss[2.910] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[900] rmse=0.020947 lr=0.284643 [1,0]:INFO:root:Epoch[100] Batch[1000] Loss[3.176] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[1000] rmse=0.020962 lr=0.284471 [1,0]:INFO:root:Epoch[100] Batch[1100] Loss[3.299] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[1100] rmse=0.020975 lr=0.284300 [1,0]:INFO:root:Epoch[100] Batch[1200] Loss[4.983] [1,0]:INFO:root:Epoch[100] Rank[0] Batch[1200] rmse=0.020991 lr=0.284128 [1,0]:INFO:root:Epoch[100] Rank[0] Batch[1251] Time cost=398.57 Train-metric=0.021000 [1,0]:INFO:root:Epoch[100] Speed: 3214.05 samples/sec [1,0]:INFO:root:Epoch[101] Batch[100] Loss[2.880] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[100] rmse=0.021006 lr=0.283868 [1,0]:INFO:root:Epoch[101] Batch[200] Loss[4.172] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[200] rmse=0.020976 lr=0.283696 [1,0]:INFO:root:Epoch[101] Batch[300] Loss[3.504] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[300] rmse=0.020961 lr=0.283524 [1,0]:INFO:root:Epoch[101] Batch[400] Loss[3.017] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[400] rmse=0.020953 lr=0.283352 [1,0]:INFO:root:Epoch[101] Batch[500] Loss[2.911] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[500] rmse=0.020976 lr=0.283180 [1,0]:INFO:root:Epoch[101] Batch[600] Loss[2.971] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[600] rmse=0.020963 lr=0.283008 [1,0]:INFO:root:Epoch[101] Batch[700] Loss[2.657] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[700] rmse=0.020997 lr=0.282835 [1,0]:INFO:root:Epoch[101] Batch[800] Loss[3.145] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[800] rmse=0.020976 lr=0.282663 [1,0]:INFO:root:Epoch[101] Batch[900] Loss[5.344] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[900] rmse=0.020969 lr=0.282490 [1,0]:INFO:root:Epoch[101] Batch[1000] Loss[3.438] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[1000] rmse=0.020994 lr=0.282318 [1,0]:INFO:root:Epoch[101] Batch[1100] Loss[3.078] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[1100] rmse=0.021004 lr=0.282145 [1,0]:INFO:root:Epoch[101] Batch[1200] Loss[2.899] [1,0]:INFO:root:Epoch[101] Rank[0] Batch[1200] rmse=0.020997 lr=0.281973 [1,0]:INFO:root:Epoch[101] Rank[0] Batch[1251] Time cost=399.42 Train-metric=0.021008 [1,0]:INFO:root:Epoch[101] Speed: 3207.21 samples/sec [1,0]:INFO:root:Epoch[102] Batch[100] Loss[2.841] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[100] rmse=0.020832 lr=0.281712 [1,0]:INFO:root:Epoch[102] Batch[200] Loss[2.723] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[200] rmse=0.020812 lr=0.281539 [1,0]:INFO:root:Epoch[102] Batch[300] Loss[2.819] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[300] rmse=0.020830 lr=0.281366 [1,0]:INFO:root:Epoch[102] Batch[400] Loss[3.402] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[400] rmse=0.020814 lr=0.281193 [1,0]:INFO:root:Epoch[102] Batch[500] Loss[5.270] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[500] rmse=0.020861 lr=0.281020 [1,0]:INFO:root:Epoch[102] Batch[600] Loss[5.123] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[600] rmse=0.020886 lr=0.280846 [1,0]:INFO:root:Epoch[102] Batch[700] Loss[3.029] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[700] rmse=0.020922 lr=0.280673 [1,0]:INFO:root:Epoch[102] Batch[800] Loss[5.052] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[800] rmse=0.020913 lr=0.280500 [1,0]:INFO:root:Epoch[102] Batch[900] Loss[3.040] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[900] rmse=0.020919 lr=0.280326 [1,0]:INFO:root:Epoch[102] Batch[1000] Loss[4.214] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[1000] rmse=0.020926 lr=0.280153 [1,0]:INFO:root:Epoch[102] Batch[1100] Loss[3.164] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[1100] rmse=0.020925 lr=0.279979 [1,0]:INFO:root:Epoch[102] Batch[1200] Loss[3.626] [1,0]:INFO:root:Epoch[102] Rank[0] Batch[1200] rmse=0.020941 lr=0.279806 [1,0]:INFO:root:Epoch[102] Rank[0] Batch[1251] Time cost=399.27 Train-metric=0.020940 [1,0]:INFO:root:Epoch[102] Speed: 3208.41 samples/sec [1,0]:INFO:root:Epoch[103] Batch[100] Loss[3.356] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[100] rmse=0.020750 lr=0.279544 [1,0]:INFO:root:Epoch[103] Batch[200] Loss[3.771] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[200] rmse=0.020871 lr=0.279370 [1,0]:INFO:root:Epoch[103] Batch[300] Loss[5.503] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[300] rmse=0.020862 lr=0.279196 [1,0]:INFO:root:Epoch[103] Batch[400] Loss[2.885] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[400] rmse=0.020905 lr=0.279022 [1,0]:INFO:root:Epoch[103] Batch[500] Loss[3.020] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[500] rmse=0.020967 lr=0.278848 [1,0]:INFO:root:Epoch[103] Batch[600] Loss[2.981] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[600] rmse=0.020948 lr=0.278674 [1,0]:INFO:root:Epoch[103] Batch[700] Loss[4.708] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[700] rmse=0.020955 lr=0.278500 [1,0]:INFO:root:Epoch[103] Batch[800] Loss[3.072] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[800] rmse=0.020963 lr=0.278326 [1,0]:INFO:root:Epoch[103] Batch[900] Loss[2.666] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[900] rmse=0.020981 lr=0.278151 [1,0]:INFO:root:Epoch[103] Batch[1000] Loss[3.237] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[1000] rmse=0.020968 lr=0.277977 [1,0]:INFO:root:Epoch[103] Batch[1100] Loss[2.737] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[1100] rmse=0.020961 lr=0.277802 [1,0]:INFO:root:Epoch[103] Batch[1200] Loss[5.054] [1,0]:INFO:root:Epoch[103] Rank[0] Batch[1200] rmse=0.020961 lr=0.277628 [1,0]:INFO:root:Epoch[103] Rank[0] Batch[1251] Time cost=399.03 Train-metric=0.020969 [1,0]:INFO:root:Epoch[103] Speed: 3210.32 samples/sec [1,0]:INFO:root:Epoch[104] Batch[100] Loss[2.758] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[100] rmse=0.020884 lr=0.277364 [1,0]:INFO:root:Epoch[104] Batch[200] Loss[4.810] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[200] rmse=0.020908 lr=0.277190 [1,0]:INFO:root:Epoch[104] Batch[300] Loss[2.968] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[300] rmse=0.020816 lr=0.277015 [1,0]:INFO:root:Epoch[104] Batch[400] Loss[4.690] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[400] rmse=0.020827 lr=0.276840 [1,0]:INFO:root:Epoch[104] Batch[500] Loss[3.161] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[500] rmse=0.020838 lr=0.276665 [1,0]:INFO:root:Epoch[104] Batch[600] Loss[2.757] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[600] rmse=0.020863 lr=0.276490 [1,0]:INFO:root:Epoch[104] Batch[700] Loss[2.974] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[700] rmse=0.020912 lr=0.276315 [1,0]:INFO:root:Epoch[104] Batch[800] Loss[3.143] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[800] rmse=0.020935 lr=0.276140 [1,0]:INFO:root:Epoch[104] Batch[900] Loss[4.622] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[900] rmse=0.020952 lr=0.275965 [1,0]:INFO:root:Epoch[104] Batch[1000] Loss[2.995] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[1000] rmse=0.020966 lr=0.275790 [1,0]:INFO:root:Epoch[104] Batch[1100] Loss[3.637] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[1100] rmse=0.020974 lr=0.275615 [1,0]:INFO:root:Epoch[104] Batch[1200] Loss[3.114] [1,0]:INFO:root:Epoch[104] Rank[0] Batch[1200] rmse=0.020981 lr=0.275439 [1,0]:INFO:root:Epoch[104] Rank[0] Batch[1251] Time cost=399.46 Train-metric=0.020978 [1,0]:INFO:root:Epoch[104] Speed: 3206.88 samples/sec [1,0]:INFO:root:Epoch[104] Rank[0] Validation-accuracy=0.607660 Validation-top_k_accuracy_5=0.837200 [1,0]:INFO:root:Epoch[105] Batch[100] Loss[2.737] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[100] rmse=0.020970 lr=0.275174 [1,0]:INFO:root:Epoch[105] Batch[200] Loss[5.301] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[200] rmse=0.020889 lr=0.274999 [1,0]:INFO:root:Epoch[105] Batch[300] Loss[2.895] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[300] rmse=0.020918 lr=0.274823 [1,0]:INFO:root:Epoch[105] Batch[400] Loss[2.651] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[400] rmse=0.020912 lr=0.274648 [1,0]:INFO:root:Epoch[105] Batch[500] Loss[2.999] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[500] rmse=0.020928 lr=0.274472 [1,0]:INFO:root:Epoch[105] Batch[600] Loss[2.941] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[600] rmse=0.020917 lr=0.274296 [1,0]:INFO:root:Epoch[105] Batch[700] Loss[5.005] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[700] rmse=0.020938 lr=0.274120 [1,0]:INFO:root:Epoch[105] Batch[800] Loss[3.179] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[800] rmse=0.020942 lr=0.273944 [1,0]:INFO:root:Epoch[105] Batch[900] Loss[2.827] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[900] rmse=0.020963 lr=0.273768 [1,0]:INFO:root:Epoch[105] Batch[1000] Loss[3.255] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[1000] rmse=0.020948 lr=0.273592 [1,0]:INFO:root:Epoch[105] Batch[1100] Loss[4.536] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[1100] rmse=0.020929 lr=0.273416 [1,0]:INFO:root:Epoch[105] Batch[1200] Loss[4.820] [1,0]:INFO:root:Epoch[105] Rank[0] Batch[1200] rmse=0.020910 lr=0.273240 [1,0]:INFO:root:Epoch[105] Rank[0] Batch[1251] Time cost=398.34 Train-metric=0.020915 [1,0]:INFO:root:Epoch[105] Speed: 3215.90 samples/sec [1,0]:INFO:root:Epoch[106] Batch[100] Loss[2.868] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[100] rmse=0.020839 lr=0.272974 [1,0]:INFO:root:Epoch[106] Batch[200] Loss[3.422] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[200] rmse=0.020855 lr=0.272797 [1,0]:INFO:root:Epoch[106] Batch[300] Loss[3.211] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[300] rmse=0.020928 lr=0.272621 [1,0]:INFO:root:Epoch[106] Batch[400] Loss[2.939] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[400] rmse=0.020935 lr=0.272444 [1,0]:INFO:root:Epoch[106] Batch[500] Loss[3.621] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[500] rmse=0.020931 lr=0.272268 [1,0]:INFO:root:Epoch[106] Batch[600] Loss[2.964] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[600] rmse=0.020929 lr=0.272091 [1,0]:INFO:root:Epoch[106] Batch[700] Loss[3.489] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[700] rmse=0.020912 lr=0.271915 [1,0]:INFO:root:Epoch[106] Batch[800] Loss[3.013] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[800] rmse=0.020917 lr=0.271738 [1,0]:INFO:root:Epoch[106] Batch[900] Loss[3.290] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[900] rmse=0.020888 lr=0.271561 [1,0]:INFO:root:Epoch[106] Batch[1000] Loss[3.129] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[1000] rmse=0.020896 lr=0.271384 [1,0]:INFO:root:Epoch[106] Batch[1100] Loss[2.896] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[1100] rmse=0.020886 lr=0.271207 [1,0]:INFO:root:Epoch[106] Batch[1200] Loss[4.302] [1,0]:INFO:root:Epoch[106] Rank[0] Batch[1200] rmse=0.020877 lr=0.271030 [1,0]:INFO:root:Epoch[106] Rank[0] Batch[1251] Time cost=399.39 Train-metric=0.020874 [1,0]:INFO:root:Epoch[106] Speed: 3207.42 samples/sec [1,0]:INFO:root:Epoch[107] Batch[100] Loss[4.217] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[100] rmse=0.020913 lr=0.270763 [1,0]:INFO:root:Epoch[107] Batch[200] Loss[4.126] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[200] rmse=0.020893 lr=0.270586 [1,0]:INFO:root:Epoch[107] Batch[300] Loss[4.230] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[300] rmse=0.020946 lr=0.270408 [1,0]:INFO:root:Epoch[107] Batch[400] Loss[2.770] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[400] rmse=0.020945 lr=0.270231 [1,0]:INFO:root:Epoch[107] Batch[500] Loss[3.030] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[500] rmse=0.020930 lr=0.270054 [1,0]:INFO:root:Epoch[107] Batch[600] Loss[2.834] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[600] rmse=0.020917 lr=0.269876 [1,0]:INFO:root:Epoch[107] Batch[700] Loss[2.855] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[700] rmse=0.020945 lr=0.269699 [1,0]:INFO:root:Epoch[107] Batch[800] Loss[2.800] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[800] rmse=0.020941 lr=0.269521 [1,0]:INFO:root:Epoch[107] Batch[900] Loss[3.170] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[900] rmse=0.020949 lr=0.269344 [1,0]:INFO:root:Epoch[107] Batch[1000] Loss[3.057] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[1000] rmse=0.020940 lr=0.269166 [1,0]:INFO:root:Epoch[107] Batch[1100] Loss[4.697] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[1100] rmse=0.020946 lr=0.268988 [1,0]:INFO:root:Epoch[107] Batch[1200] Loss[3.001] [1,0]:INFO:root:Epoch[107] Rank[0] Batch[1200] rmse=0.020948 lr=0.268811 [1,0]:INFO:root:Epoch[107] Rank[0] Batch[1251] Time cost=399.24 Train-metric=0.020949 [1,0]:INFO:root:Epoch[107] Speed: 3208.64 samples/sec [1,0]:INFO:root:Epoch[108] Batch[100] Loss[3.020] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[100] rmse=0.020682 lr=0.268542 [1,0]:INFO:root:Epoch[108] Batch[200] Loss[3.139] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[200] rmse=0.020704 lr=0.268364 [1,0]:INFO:root:Epoch[108] Batch[300] Loss[3.942] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[300] rmse=0.020676 lr=0.268186 [1,0]:INFO:root:Epoch[108] Batch[400] Loss[5.282] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[400] rmse=0.020768 lr=0.268008 [1,0]:INFO:root:Epoch[108] Batch[500] Loss[2.933] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[500] rmse=0.020809 lr=0.267830 [1,0]:INFO:root:Epoch[108] Batch[600] Loss[2.788] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[600] rmse=0.020781 lr=0.267652 [1,0]:INFO:root:Epoch[108] Batch[700] Loss[4.344] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[700] rmse=0.020782 lr=0.267473 [1,0]:INFO:root:Epoch[108] Batch[800] Loss[2.943] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[800] rmse=0.020788 lr=0.267295 [1,0]:INFO:root:Epoch[108] Batch[900] Loss[2.928] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[900] rmse=0.020801 lr=0.267117 [1,0]:INFO:root:Epoch[108] Batch[1000] Loss[4.360] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[1000] rmse=0.020817 lr=0.266938 [1,0]:INFO:root:Epoch[108] Batch[1100] Loss[2.971] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[1100] rmse=0.020826 lr=0.266760 [1,0]:INFO:root:Epoch[108] Batch[1200] Loss[3.347] [1,0]:INFO:root:Epoch[108] Rank[0] Batch[1200] rmse=0.020817 lr=0.266581 [1,0]:INFO:root:Epoch[108] Rank[0] Batch[1251] Time cost=399.25 Train-metric=0.020826 [1,0]:INFO:root:Epoch[108] Speed: 3208.59 samples/sec [1,0]:INFO:root:Epoch[109] Batch[100] Loss[2.965] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[100] rmse=0.020751 lr=0.266312 [1,0]:INFO:root:Epoch[109] Batch[200] Loss[3.710] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[200] rmse=0.020818 lr=0.266133 [1,0]:INFO:root:Epoch[109] Batch[300] Loss[5.219] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[300] rmse=0.020828 lr=0.265954 [1,0]:INFO:root:Epoch[109] Batch[400] Loss[3.075] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[400] rmse=0.020847 lr=0.265775 [1,0]:INFO:root:Epoch[109] Batch[500] Loss[2.708] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[500] rmse=0.020846 lr=0.265596 [1,0]:INFO:root:Epoch[109] Batch[600] Loss[2.738] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[600] rmse=0.020854 lr=0.265418 [1,0]:INFO:root:Epoch[109] Batch[700] Loss[3.198] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[700] rmse=0.020856 lr=0.265239 [1,0]:INFO:root:Epoch[109] Batch[800] Loss[2.911] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[800] rmse=0.020876 lr=0.265060 [1,0]:INFO:root:Epoch[109] Batch[900] Loss[3.237] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[900] rmse=0.020858 lr=0.264880 [1,0]:INFO:root:Epoch[109] Batch[1000] Loss[3.660] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[1000] rmse=0.020877 lr=0.264701 [1,0]:INFO:root:Epoch[109] Batch[1100] Loss[2.752] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[1100] rmse=0.020892 lr=0.264522 [1,0]:INFO:root:Epoch[109] Batch[1200] Loss[2.625] [1,0]:INFO:root:Epoch[109] Rank[0] Batch[1200] rmse=0.020899 lr=0.264343 [1,0]:INFO:root:Epoch[109] Rank[0] Batch[1251] Time cost=399.56 Train-metric=0.020897 [1,0]:INFO:root:Epoch[109] Speed: 3206.05 samples/sec [1,0]:INFO:root:Epoch[109] Rank[0] Validation-accuracy=0.618080 Validation-top_k_accuracy_5=0.847000 [1,0]:INFO:root:Epoch[110] Batch[100] Loss[2.565] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[100] rmse=0.020692 lr=0.264072 [1,0]:INFO:root:Epoch[110] Batch[200] Loss[3.428] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[200] rmse=0.020770 lr=0.263892 [1,0]:INFO:root:Epoch[110] Batch[300] Loss[4.750] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[300] rmse=0.020745 lr=0.263713 [1,0]:INFO:root:Epoch[110] Batch[400] Loss[2.616] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[400] rmse=0.020785 lr=0.263533 [1,0]:INFO:root:Epoch[110] Batch[500] Loss[3.704] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[500] rmse=0.020741 lr=0.263354 [1,0]:INFO:root:Epoch[110] Batch[600] Loss[4.241] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[600] rmse=0.020769 lr=0.263174 [1,0]:INFO:root:Epoch[110] Batch[700] Loss[2.704] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[700] rmse=0.020789 lr=0.262995 [1,0]:INFO:root:Epoch[110] Batch[800] Loss[2.825] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[800] rmse=0.020805 lr=0.262815 [1,0]:INFO:root:Epoch[110] Batch[900] Loss[3.651] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[900] rmse=0.020823 lr=0.262635 [1,0]:INFO:root:Epoch[110] Batch[1000] Loss[4.786] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[1000] rmse=0.020840 lr=0.262455 [1,0]:INFO:root:Epoch[110] Batch[1100] Loss[3.201] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[1100] rmse=0.020833 lr=0.262275 [1,0]:INFO:root:Epoch[110] Batch[1200] Loss[2.827] [1,0]:INFO:root:Epoch[110] Rank[0] Batch[1200] rmse=0.020825 lr=0.262095 [1,0]:INFO:root:Epoch[110] Rank[0] Batch[1251] Time cost=399.61 Train-metric=0.020833 [1,0]:INFO:root:Epoch[110] Speed: 3205.71 samples/sec [1,0]:INFO:root:Epoch[111] Batch[100] Loss[4.803] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[100] rmse=0.021003 lr=0.261823 [1,0]:INFO:root:Epoch[111] Batch[200] Loss[5.304] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[200] rmse=0.020892 lr=0.261643 [1,0]:INFO:root:Epoch[111] Batch[300] Loss[2.768] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[300] rmse=0.020908 lr=0.261463 [1,0]:INFO:root:Epoch[111] Batch[400] Loss[2.994] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[400] rmse=0.020852 lr=0.261283 [1,0]:INFO:root:Epoch[111] Batch[500] Loss[2.842] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[500] rmse=0.020849 lr=0.261102 [1,0]:INFO:root:Epoch[111] Batch[600] Loss[2.706] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[600] rmse=0.020830 lr=0.260922 [1,0]:INFO:root:Epoch[111] Batch[700] Loss[3.082] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[700] rmse=0.020860 lr=0.260742 [1,0]:INFO:root:Epoch[111] Batch[800] Loss[2.899] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[800] rmse=0.020853 lr=0.260561 [1,0]:INFO:root:Epoch[111] Batch[900] Loss[2.955] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[900] rmse=0.020873 lr=0.260381 [1,0]:INFO:root:Epoch[111] Batch[1000] Loss[2.910] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[1000] rmse=0.020897 lr=0.260200 [1,0]:INFO:root:Epoch[111] Batch[1100] Loss[3.670] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[1100] rmse=0.020904 lr=0.260019 [1,0]:INFO:root:Epoch[111] Batch[1200] Loss[4.618] [1,0]:INFO:root:Epoch[111] Rank[0] Batch[1200] rmse=0.020891 lr=0.259839 [1,0]:INFO:root:Epoch[111] Rank[0] Batch[1251] Time cost=398.96 Train-metric=0.020887 [1,0]:INFO:root:Epoch[111] Speed: 3210.89 samples/sec [1,0]:INFO:root:Epoch[112] Batch[100] Loss[3.929] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[100] rmse=0.020796 lr=0.259566 [1,0]:INFO:root:Epoch[112] Batch[200] Loss[3.736] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[200] rmse=0.020701 lr=0.259385 [1,0]:INFO:root:Epoch[112] Batch[300] Loss[3.272] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[300] rmse=0.020721 lr=0.259204 [1,0]:INFO:root:Epoch[112] Batch[400] Loss[4.531] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[400] rmse=0.020772 lr=0.259023 [1,0]:INFO:root:Epoch[112] Batch[500] Loss[2.746] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[500] rmse=0.020800 lr=0.258842 [1,0]:INFO:root:Epoch[112] Batch[600] Loss[3.205] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[600] rmse=0.020831 lr=0.258661 [1,0]:INFO:root:Epoch[112] Batch[700] Loss[4.996] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[700] rmse=0.020790 lr=0.258480 [1,0]:INFO:root:Epoch[112] Batch[800] Loss[2.911] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[800] rmse=0.020799 lr=0.258299 [1,0]:INFO:root:Epoch[112] Batch[900] Loss[3.100] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[900] rmse=0.020811 lr=0.258118 [1,0]:INFO:root:Epoch[112] Batch[1000] Loss[3.040] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[1000] rmse=0.020808 lr=0.257937 [1,0]:INFO:root:Epoch[112] Batch[1100] Loss[2.850] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[1100] rmse=0.020807 lr=0.257755 [1,0]:INFO:root:Epoch[112] Batch[1200] Loss[4.056] [1,0]:INFO:root:Epoch[112] Rank[0] Batch[1200] rmse=0.020809 lr=0.257574 [1,0]:INFO:root:Epoch[112] Rank[0] Batch[1251] Time cost=398.79 Train-metric=0.020810 [1,0]:INFO:root:Epoch[112] Speed: 3212.24 samples/sec [1,0]:INFO:root:Epoch[113] Batch[100] Loss[3.305] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[100] rmse=0.020723 lr=0.257300 [1,0]:INFO:root:Epoch[113] Batch[200] Loss[2.815] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[200] rmse=0.020729 lr=0.257119 [1,0]:INFO:root:Epoch[113] Batch[300] Loss[2.791] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[300] rmse=0.020782 lr=0.256937 [1,0]:INFO:root:Epoch[113] Batch[400] Loss[2.757] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[400] rmse=0.020805 lr=0.256756 [1,0]:INFO:root:Epoch[113] Batch[500] Loss[3.653] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[500] rmse=0.020835 lr=0.256574 [1,0]:INFO:root:Epoch[113] Batch[600] Loss[3.735] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[600] rmse=0.020843 lr=0.256392 [1,0]:INFO:root:Epoch[113] Batch[700] Loss[2.936] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[700] rmse=0.020849 lr=0.256211 [1,0]:INFO:root:Epoch[113] Batch[800] Loss[2.698] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[800] rmse=0.020841 lr=0.256029 [1,0]:INFO:root:Epoch[113] Batch[900] Loss[4.029] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[900] rmse=0.020828 lr=0.255847 [1,0]:INFO:root:Epoch[113] Batch[1000] Loss[4.879] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[1000] rmse=0.020814 lr=0.255665 [1,0]:INFO:root:Epoch[113] Batch[1100] Loss[2.891] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[1100] rmse=0.020821 lr=0.255483 [1,0]:INFO:root:Epoch[113] Batch[1200] Loss[2.945] [1,0]:INFO:root:Epoch[113] Rank[0] Batch[1200] rmse=0.020818 lr=0.255301 [1,0]:INFO:root:Epoch[113] Rank[0] Batch[1251] Time cost=398.74 Train-metric=0.020808 [1,0]:INFO:root:Epoch[113] Speed: 3212.70 samples/sec [1,0]:INFO:root:Epoch[114] Batch[100] Loss[2.558] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[100] rmse=0.020579 lr=0.255026 [1,0]:INFO:root:Epoch[114] Batch[200] Loss[2.705] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[200] rmse=0.020711 lr=0.254844 [1,0]:INFO:root:Epoch[114] Batch[300] Loss[3.305] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[300] rmse=0.020686 lr=0.254662 [1,0]:INFO:root:Epoch[114] Batch[400] Loss[4.736] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[400] rmse=0.020718 lr=0.254480 [1,0]:INFO:root:Epoch[114] Batch[500] Loss[3.198] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[500] rmse=0.020705 lr=0.254298 [1,0]:INFO:root:Epoch[114] Batch[600] Loss[5.161] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[600] rmse=0.020684 lr=0.254115 [1,0]:INFO:root:Epoch[114] Batch[700] Loss[4.087] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[700] rmse=0.020717 lr=0.253933 [1,0]:INFO:root:Epoch[114] Batch[800] Loss[5.397] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[800] rmse=0.020736 lr=0.253751 [1,0]:INFO:root:Epoch[114] Batch[900] Loss[2.981] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[900] rmse=0.020737 lr=0.253568 [1,0]:INFO:root:Epoch[114] Batch[1000] Loss[2.868] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[1000] rmse=0.020744 lr=0.253386 [1,0]:INFO:root:Epoch[114] Batch[1100] Loss[4.421] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[1100] rmse=0.020748 lr=0.253203 [1,0]:INFO:root:Epoch[114] Batch[1200] Loss[2.962] [1,0]:INFO:root:Epoch[114] Rank[0] Batch[1200] rmse=0.020745 lr=0.253021 [1,0]:INFO:root:Epoch[114] Rank[0] Batch[1251] Time cost=399.05 Train-metric=0.020742 [1,0]:INFO:root:Epoch[114] Speed: 3210.20 samples/sec [1,0]:INFO:root:Epoch[114] Rank[0] Validation-accuracy=0.615780 Validation-top_k_accuracy_5=0.845040 [1,0]:INFO:root:Epoch[115] Batch[100] Loss[2.896] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[100] rmse=0.020637 lr=0.252745 [1,0]:INFO:root:Epoch[115] Batch[200] Loss[2.534] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[200] rmse=0.020676 lr=0.252562 [1,0]:INFO:root:Epoch[115] Batch[300] Loss[2.667] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[300] rmse=0.020775 lr=0.252379 [1,0]:INFO:root:Epoch[115] Batch[400] Loss[3.105] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[400] rmse=0.020784 lr=0.252197 [1,0]:INFO:root:Epoch[115] Batch[500] Loss[3.191] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[500] rmse=0.020764 lr=0.252014 [1,0]:INFO:root:Epoch[115] Batch[600] Loss[2.726] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[600] rmse=0.020791 lr=0.251831 [1,0]:INFO:root:Epoch[115] Batch[700] Loss[4.576] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[700] rmse=0.020790 lr=0.251648 [1,0]:INFO:root:Epoch[115] Batch[800] Loss[2.931] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[800] rmse=0.020797 lr=0.251465 [1,0]:INFO:root:Epoch[115] Batch[900] Loss[3.381] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[900] rmse=0.020822 lr=0.251282 [1,0]:INFO:root:Epoch[115] Batch[1000] Loss[2.839] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[1000] rmse=0.020817 lr=0.251099 [1,0]:INFO:root:Epoch[115] Batch[1100] Loss[4.743] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[1100] rmse=0.020832 lr=0.250916 [1,0]:INFO:root:Epoch[115] Batch[1200] Loss[3.049] [1,0]:INFO:root:Epoch[115] Rank[0] Batch[1200] rmse=0.020836 lr=0.250733 [1,0]:INFO:root:Epoch[115] Rank[0] Batch[1251] Time cost=398.37 Train-metric=0.020836 [1,0]:INFO:root:Epoch[115] Speed: 3215.65 samples/sec [1,0]:INFO:root:Epoch[116] Batch[100] Loss[2.728] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[100] rmse=0.020723 lr=0.250456 [1,0]:INFO:root:Epoch[116] Batch[200] Loss[4.684] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[200] rmse=0.020723 lr=0.250273 [1,0]:INFO:root:Epoch[116] Batch[300] Loss[5.020] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[300] rmse=0.020709 lr=0.250089 [1,0]:INFO:root:Epoch[116] Batch[400] Loss[3.801] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[400] rmse=0.020749 lr=0.249906 [1,0]:INFO:root:Epoch[116] Batch[500] Loss[2.893] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[500] rmse=0.020762 lr=0.249723 [1,0]:INFO:root:Epoch[116] Batch[600] Loss[2.690] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[600] rmse=0.020769 lr=0.249539 [1,0]:INFO:root:Epoch[116] Batch[700] Loss[2.774] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[700] rmse=0.020755 lr=0.249356 [1,0]:INFO:root:Epoch[116] Batch[800] Loss[4.044] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[800] rmse=0.020756 lr=0.249172 [1,0]:INFO:root:Epoch[116] Batch[900] Loss[2.756] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[900] rmse=0.020770 lr=0.248988 [1,0]:INFO:root:Epoch[116] Batch[1000] Loss[4.817] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[1000] rmse=0.020770 lr=0.248805 [1,0]:INFO:root:Epoch[116] Batch[1100] Loss[2.858] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[1100] rmse=0.020798 lr=0.248621 [1,0]:INFO:root:Epoch[116] Batch[1200] Loss[5.266] [1,0]:INFO:root:Epoch[116] Rank[0] Batch[1200] rmse=0.020804 lr=0.248437 [1,0]:INFO:root:Epoch[116] Rank[0] Batch[1251] Time cost=399.82 Train-metric=0.020806 [1,0]:INFO:root:Epoch[116] Speed: 3204.00 samples/sec [1,0]:INFO:root:Epoch[117] Batch[100] Loss[2.884] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[100] rmse=0.020768 lr=0.248160 [1,0]:INFO:root:Epoch[117] Batch[200] Loss[4.880] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[200] rmse=0.020747 lr=0.247976 [1,0]:INFO:root:Epoch[117] Batch[300] Loss[3.722] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[300] rmse=0.020801 lr=0.247792 [1,0]:INFO:root:Epoch[117] Batch[400] Loss[2.711] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[400] rmse=0.020776 lr=0.247608 [1,0]:INFO:root:Epoch[117] Batch[500] Loss[3.696] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[500] rmse=0.020799 lr=0.247424 [1,0]:INFO:root:Epoch[117] Batch[600] Loss[3.060] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[600] rmse=0.020763 lr=0.247240 [1,0]:INFO:root:Epoch[117] Batch[700] Loss[4.458] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[700] rmse=0.020780 lr=0.247056 [1,0]:INFO:root:Epoch[117] Batch[800] Loss[5.156] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[800] rmse=0.020782 lr=0.246872 [1,0]:INFO:root:Epoch[117] Batch[900] Loss[2.957] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[900] rmse=0.020808 lr=0.246688 [1,0]:INFO:root:Epoch[117] Batch[1000] Loss[4.821] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[1000] rmse=0.020792 lr=0.246504 [1,0]:INFO:root:Epoch[117] Batch[1100] Loss[3.195] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[1100] rmse=0.020789 lr=0.246320 [1,0]:INFO:root:Epoch[117] Batch[1200] Loss[2.641] [1,0]:INFO:root:Epoch[117] Rank[0] Batch[1200] rmse=0.020803 lr=0.246136 [1,0]:INFO:root:Epoch[117] Rank[0] Batch[1251] Time cost=399.75 Train-metric=0.020820 [1,0]:INFO:root:Epoch[117] Speed: 3204.57 samples/sec [1,0]:INFO:root:Epoch[118] Batch[100] Loss[2.710] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[100] rmse=0.020881 lr=0.245857 [1,0]:INFO:root:Epoch[118] Batch[200] Loss[3.210] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[200] rmse=0.020797 lr=0.245673 [1,0]:INFO:root:Epoch[118] Batch[300] Loss[4.836] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[300] rmse=0.020731 lr=0.245488 [1,0]:INFO:root:Epoch[118] Batch[400] Loss[2.739] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[400] rmse=0.020721 lr=0.245304 [1,0]:INFO:root:Epoch[118] Batch[500] Loss[2.836] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[500] rmse=0.020731 lr=0.245120 [1,0]:INFO:root:Epoch[118] Batch[600] Loss[3.016] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[600] rmse=0.020758 lr=0.244935 [1,0]:INFO:root:Epoch[118] Batch[700] Loss[3.161] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[700] rmse=0.020749 lr=0.244751 [1,0]:INFO:root:Epoch[118] Batch[800] Loss[2.973] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[800] rmse=0.020741 lr=0.244566 [1,0]:INFO:root:Epoch[118] Batch[900] Loss[3.143] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[900] rmse=0.020765 lr=0.244381 [1,0]:INFO:root:Epoch[118] Batch[1000] Loss[3.102] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[1000] rmse=0.020764 lr=0.244197 [1,0]:INFO:root:Epoch[118] Batch[1100] Loss[2.981] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[1100] rmse=0.020785 lr=0.244012 [1,0]:INFO:root:Epoch[118] Batch[1200] Loss[4.288] [1,0]:INFO:root:Epoch[118] Rank[0] Batch[1200] rmse=0.020787 lr=0.243827 [1,0]:INFO:root:Epoch[118] Rank[0] Batch[1251] Time cost=398.96 Train-metric=0.020789 [1,0]:INFO:root:Epoch[118] Speed: 3210.88 samples/sec [1,0]:INFO:root:Epoch[119] Batch[100] Loss[2.782] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[100] rmse=0.020557 lr=0.243548 [1,0]:INFO:root:Epoch[119] Batch[200] Loss[2.671] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[200] rmse=0.020632 lr=0.243363 [1,0]:INFO:root:Epoch[119] Batch[300] Loss[3.082] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[300] rmse=0.020698 lr=0.243178 [1,0]:INFO:root:Epoch[119] Batch[400] Loss[5.082] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[400] rmse=0.020691 lr=0.242993 [1,0]:INFO:root:Epoch[119] Batch[500] Loss[3.179] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[500] rmse=0.020717 lr=0.242808 [1,0]:INFO:root:Epoch[119] Batch[600] Loss[2.786] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[600] rmse=0.020704 lr=0.242623 [1,0]:INFO:root:Epoch[119] Batch[700] Loss[2.836] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[700] rmse=0.020705 lr=0.242438 [1,0]:INFO:root:Epoch[119] Batch[800] Loss[2.692] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[800] rmse=0.020684 lr=0.242253 [1,0]:INFO:root:Epoch[119] Batch[900] Loss[2.704] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[900] rmse=0.020691 lr=0.242068 [1,0]:INFO:root:Epoch[119] Batch[1000] Loss[2.789] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[1000] rmse=0.020710 lr=0.241883 [1,0]:INFO:root:Epoch[119] Batch[1100] Loss[3.921] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[1100] rmse=0.020740 lr=0.241698 [1,0]:INFO:root:Epoch[119] Batch[1200] Loss[2.745] [1,0]:INFO:root:Epoch[119] Rank[0] Batch[1200] rmse=0.020741 lr=0.241513 [1,0]:INFO:root:Epoch[119] Rank[0] Batch[1251] Time cost=398.93 Train-metric=0.020749 [1,0]:INFO:root:Epoch[119] Speed: 3211.12 samples/sec [1,0]:INFO:root:Epoch[119] Rank[0] Validation-accuracy=0.625240 Validation-top_k_accuracy_5=0.851320 [1,0]:INFO:root:Epoch[120] Batch[100] Loss[3.525] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[100] rmse=0.020804 lr=0.241233 [1,0]:INFO:root:Epoch[120] Batch[200] Loss[3.359] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[200] rmse=0.020859 lr=0.241047 [1,0]:INFO:root:Epoch[120] Batch[300] Loss[5.126] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[300] rmse=0.020857 lr=0.240862 [1,0]:INFO:root:Epoch[120] Batch[400] Loss[3.322] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[400] rmse=0.020807 lr=0.240677 [1,0]:INFO:root:Epoch[120] Batch[500] Loss[4.615] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[500] rmse=0.020773 lr=0.240491 [1,0]:INFO:root:Epoch[120] Batch[600] Loss[2.887] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[600] rmse=0.020764 lr=0.240306 [1,0]:INFO:root:Epoch[120] Batch[700] Loss[5.239] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[700] rmse=0.020733 lr=0.240120 [1,0]:INFO:root:Epoch[120] Batch[800] Loss[2.835] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[800] rmse=0.020745 lr=0.239935 [1,0]:INFO:root:Epoch[120] Batch[900] Loss[3.168] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[900] rmse=0.020739 lr=0.239749 [1,0]:INFO:root:Epoch[120] Batch[1000] Loss[3.679] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[1000] rmse=0.020733 lr=0.239564 [1,0]:INFO:root:Epoch[120] Batch[1100] Loss[2.856] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[1100] rmse=0.020725 lr=0.239378 [1,0]:INFO:root:Epoch[120] Batch[1200] Loss[3.223] [1,0]:INFO:root:Epoch[120] Rank[0] Batch[1200] rmse=0.020727 lr=0.239192 [1,0]:INFO:root:Epoch[120] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.020726 [1,0]:INFO:root:Epoch[120] Speed: 3207.90 samples/sec [1,0]:INFO:root:Epoch[121] Batch[100] Loss[2.973] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[100] rmse=0.020737 lr=0.238912 [1,0]:INFO:root:Epoch[121] Batch[200] Loss[2.796] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[200] rmse=0.020774 lr=0.238726 [1,0]:INFO:root:Epoch[121] Batch[300] Loss[2.802] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[300] rmse=0.020705 lr=0.238540 [1,0]:INFO:root:Epoch[121] Batch[400] Loss[5.272] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[400] rmse=0.020665 lr=0.238354 [1,0]:INFO:root:Epoch[121] Batch[500] Loss[3.437] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[500] rmse=0.020655 lr=0.238168 [1,0]:INFO:root:Epoch[121] Batch[600] Loss[2.865] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[600] rmse=0.020644 lr=0.237982 [1,0]:INFO:root:Epoch[121] Batch[700] Loss[4.026] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[700] rmse=0.020641 lr=0.237797 [1,0]:INFO:root:Epoch[121] Batch[800] Loss[4.513] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[800] rmse=0.020677 lr=0.237611 [1,0]:INFO:root:Epoch[121] Batch[900] Loss[2.855] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[900] rmse=0.020686 lr=0.237425 [1,0]:INFO:root:Epoch[121] Batch[1000] Loss[3.302] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[1000] rmse=0.020684 lr=0.237238 [1,0]:INFO:root:Epoch[121] Batch[1100] Loss[2.821] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[1100] rmse=0.020685 lr=0.237052 [1,0]:INFO:root:Epoch[121] Batch[1200] Loss[5.108] [1,0]:INFO:root:Epoch[121] Rank[0] Batch[1200] rmse=0.020692 lr=0.236866 [1,0]:INFO:root:Epoch[121] Rank[0] Batch[1251] Time cost=402.32 Train-metric=0.020700 [1,0]:INFO:root:Epoch[121] Speed: 3184.12 samples/sec [1,0]:INFO:root:Epoch[122] Batch[100] Loss[2.950] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[100] rmse=0.020475 lr=0.236585 [1,0]:INFO:root:Epoch[122] Batch[200] Loss[2.562] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[200] rmse=0.020519 lr=0.236399 [1,0]:INFO:root:Epoch[122] Batch[300] Loss[2.711] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[300] rmse=0.020577 lr=0.236213 [1,0]:INFO:root:Epoch[122] Batch[400] Loss[2.927] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[400] rmse=0.020601 lr=0.236027 [1,0]:INFO:root:Epoch[122] Batch[500] Loss[2.771] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[500] rmse=0.020654 lr=0.235840 [1,0]:INFO:root:Epoch[122] Batch[600] Loss[2.768] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[600] rmse=0.020652 lr=0.235654 [1,0]:INFO:root:Epoch[122] Batch[700] Loss[5.108] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[700] rmse=0.020667 lr=0.235468 [1,0]:INFO:root:Epoch[122] Batch[800] Loss[2.812] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[800] rmse=0.020651 lr=0.235281 [1,0]:INFO:root:Epoch[122] Batch[900] Loss[2.712] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[900] rmse=0.020657 lr=0.235095 [1,0]:INFO:root:Epoch[122] Batch[1000] Loss[5.167] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[1000] rmse=0.020649 lr=0.234908 [1,0]:INFO:root:Epoch[122] Batch[1100] Loss[3.191] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[1100] rmse=0.020658 lr=0.234722 [1,0]:INFO:root:Epoch[122] Batch[1200] Loss[3.367] [1,0]:INFO:root:Epoch[122] Rank[0] Batch[1200] rmse=0.020673 lr=0.234535 [1,0]:INFO:root:Epoch[122] Rank[0] Batch[1251] Time cost=402.27 Train-metric=0.020667 [1,0]:INFO:root:Epoch[122] Speed: 3184.52 samples/sec [1,0]:INFO:root:Epoch[123] Batch[100] Loss[2.923] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[100] rmse=0.020532 lr=0.234254 [1,0]:INFO:root:Epoch[123] Batch[200] Loss[2.651] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[200] rmse=0.020560 lr=0.234067 [1,0]:INFO:root:Epoch[123] Batch[300] Loss[2.852] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[300] rmse=0.020538 lr=0.233880 [1,0]:INFO:root:Epoch[123] Batch[400] Loss[2.815] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[400] rmse=0.020601 lr=0.233694 [1,0]:INFO:root:Epoch[123] Batch[500] Loss[2.828] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[500] rmse=0.020624 lr=0.233507 [1,0]:INFO:root:Epoch[123] Batch[600] Loss[5.290] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[600] rmse=0.020605 lr=0.233320 [1,0]:INFO:root:Epoch[123] Batch[700] Loss[2.948] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[700] rmse=0.020618 lr=0.233134 [1,0]:INFO:root:Epoch[123] Batch[800] Loss[3.394] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[800] rmse=0.020611 lr=0.232947 [1,0]:INFO:root:Epoch[123] Batch[900] Loss[4.895] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[900] rmse=0.020652 lr=0.232760 [1,0]:INFO:root:Epoch[123] Batch[1000] Loss[4.918] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[1000] rmse=0.020675 lr=0.232573 [1,0]:INFO:root:Epoch[123] Batch[1100] Loss[4.947] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[1100] rmse=0.020662 lr=0.232386 [1,0]:INFO:root:Epoch[123] Batch[1200] Loss[3.793] [1,0]:INFO:root:Epoch[123] Rank[0] Batch[1200] rmse=0.020679 lr=0.232199 [1,0]:INFO:root:Epoch[123] Rank[0] Batch[1251] Time cost=399.56 Train-metric=0.020695 [1,0]:INFO:root:Epoch[123] Speed: 3206.12 samples/sec [1,0]:INFO:root:Epoch[124] Batch[100] Loss[3.993] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[100] rmse=0.020621 lr=0.231917 [1,0]:INFO:root:Epoch[124] Batch[200] Loss[2.720] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[200] rmse=0.020564 lr=0.231730 [1,0]:INFO:root:Epoch[124] Batch[300] Loss[2.915] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[300] rmse=0.020637 lr=0.231543 [1,0]:INFO:root:Epoch[124] Batch[400] Loss[2.749] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[400] rmse=0.020632 lr=0.231356 [1,0]:INFO:root:Epoch[124] Batch[500] Loss[2.815] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[500] rmse=0.020644 lr=0.231169 [1,0]:INFO:root:Epoch[124] Batch[600] Loss[2.857] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[600] rmse=0.020648 lr=0.230982 [1,0]:INFO:root:Epoch[124] Batch[700] Loss[3.067] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[700] rmse=0.020656 lr=0.230795 [1,0]:INFO:root:Epoch[124] Batch[800] Loss[3.161] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[800] rmse=0.020645 lr=0.230608 [1,0]:INFO:root:Epoch[124] Batch[900] Loss[3.416] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[900] rmse=0.020648 lr=0.230421 [1,0]:INFO:root:Epoch[124] Batch[1000] Loss[3.957] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[1000] rmse=0.020651 lr=0.230233 [1,0]:INFO:root:Epoch[124] Batch[1100] Loss[4.886] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[1100] rmse=0.020642 lr=0.230046 [1,0]:INFO:root:Epoch[124] Batch[1200] Loss[2.501] [1,0]:INFO:root:Epoch[124] Rank[0] Batch[1200] rmse=0.020661 lr=0.229859 [1,0]:INFO:root:Epoch[124] Rank[0] Batch[1251] Time cost=400.19 Train-metric=0.020665 [1,0]:INFO:root:Epoch[124] Speed: 3201.08 samples/sec [1,0]:INFO:root:Epoch[124] Rank[0] Validation-accuracy=0.621100 Validation-top_k_accuracy_5=0.848380 [1,0]:INFO:root:Epoch[125] Batch[100] Loss[2.947] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[100] rmse=0.020766 lr=0.229576 [1,0]:INFO:root:Epoch[125] Batch[200] Loss[3.553] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[200] rmse=0.020650 lr=0.229389 [1,0]:INFO:root:Epoch[125] Batch[300] Loss[3.032] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[300] rmse=0.020644 lr=0.229201 [1,0]:INFO:root:Epoch[125] Batch[400] Loss[3.044] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[400] rmse=0.020653 lr=0.229014 [1,0]:INFO:root:Epoch[125] Batch[500] Loss[3.154] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[500] rmse=0.020620 lr=0.228827 [1,0]:INFO:root:Epoch[125] Batch[600] Loss[4.434] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[600] rmse=0.020644 lr=0.228639 [1,0]:INFO:root:Epoch[125] Batch[700] Loss[2.596] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[700] rmse=0.020674 lr=0.228452 [1,0]:INFO:root:Epoch[125] Batch[800] Loss[4.445] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[800] rmse=0.020672 lr=0.228264 [1,0]:INFO:root:Epoch[125] Batch[900] Loss[3.486] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[900] rmse=0.020682 lr=0.228077 [1,0]:INFO:root:Epoch[125] Batch[1000] Loss[2.853] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[1000] rmse=0.020678 lr=0.227889 [1,0]:INFO:root:Epoch[125] Batch[1100] Loss[3.769] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[1100] rmse=0.020695 lr=0.227702 [1,0]:INFO:root:Epoch[125] Batch[1200] Loss[4.128] [1,0]:INFO:root:Epoch[125] Rank[0] Batch[1200] rmse=0.020700 lr=0.227514 [1,0]:INFO:root:Epoch[125] Rank[0] Batch[1251] Time cost=399.05 Train-metric=0.020696 [1,0]:INFO:root:Epoch[125] Speed: 3210.16 samples/sec [1,0]:INFO:root:Epoch[126] Batch[100] Loss[3.064] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[100] rmse=0.020212 lr=0.227231 [1,0]:INFO:root:Epoch[126] Batch[200] Loss[4.627] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[200] rmse=0.020406 lr=0.227043 [1,0]:INFO:root:Epoch[126] Batch[300] Loss[2.569] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[300] rmse=0.020445 lr=0.226856 [1,0]:INFO:root:Epoch[126] Batch[400] Loss[3.429] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[400] rmse=0.020471 lr=0.226668 [1,0]:INFO:root:Epoch[126] Batch[500] Loss[2.748] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[500] rmse=0.020519 lr=0.226480 [1,0]:INFO:root:Epoch[126] Batch[600] Loss[2.785] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[600] rmse=0.020539 lr=0.226293 [1,0]:INFO:root:Epoch[126] Batch[700] Loss[4.884] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[700] rmse=0.020544 lr=0.226105 [1,0]:INFO:root:Epoch[126] Batch[800] Loss[5.023] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[800] rmse=0.020553 lr=0.225917 [1,0]:INFO:root:Epoch[126] Batch[900] Loss[3.068] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[900] rmse=0.020587 lr=0.225729 [1,0]:INFO:root:Epoch[126] Batch[1000] Loss[5.411] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[1000] rmse=0.020620 lr=0.225542 [1,0]:INFO:root:Epoch[126] Batch[1100] Loss[4.314] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[1100] rmse=0.020632 lr=0.225354 [1,0]:INFO:root:Epoch[126] Batch[1200] Loss[5.297] [1,0]:INFO:root:Epoch[126] Rank[0] Batch[1200] rmse=0.020635 lr=0.225166 [1,0]:INFO:root:Epoch[126] Rank[0] Batch[1251] Time cost=398.59 Train-metric=0.020641 [1,0]:INFO:root:Epoch[126] Speed: 3213.91 samples/sec [1,0]:INFO:root:Epoch[127] Batch[100] Loss[2.885] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[100] rmse=0.020411 lr=0.224882 [1,0]:INFO:root:Epoch[127] Batch[200] Loss[3.019] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[200] rmse=0.020532 lr=0.224694 [1,0]:INFO:root:Epoch[127] Batch[300] Loss[4.956] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[300] rmse=0.020539 lr=0.224506 [1,0]:INFO:root:Epoch[127] Batch[400] Loss[3.798] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[400] rmse=0.020592 lr=0.224318 [1,0]:INFO:root:Epoch[127] Batch[500] Loss[4.537] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[500] rmse=0.020569 lr=0.224130 [1,0]:INFO:root:Epoch[127] Batch[600] Loss[4.720] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[600] rmse=0.020574 lr=0.223942 [1,0]:INFO:root:Epoch[127] Batch[700] Loss[3.216] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[700] rmse=0.020589 lr=0.223754 [1,0]:INFO:root:Epoch[127] Batch[800] Loss[3.120] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[800] rmse=0.020616 lr=0.223566 [1,0]:INFO:root:Epoch[127] Batch[900] Loss[2.716] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[900] rmse=0.020625 lr=0.223378 [1,0]:INFO:root:Epoch[127] Batch[1000] Loss[5.124] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[1000] rmse=0.020632 lr=0.223190 [1,0]:INFO:root:Epoch[127] Batch[1100] Loss[3.152] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[1100] rmse=0.020645 lr=0.223002 [1,0]:INFO:root:Epoch[127] Batch[1200] Loss[2.823] [1,0]:INFO:root:Epoch[127] Rank[0] Batch[1200] rmse=0.020643 lr=0.222814 [1,0]:INFO:root:Epoch[127] Rank[0] Batch[1251] Time cost=399.06 Train-metric=0.020642 [1,0]:INFO:root:Epoch[127] Speed: 3210.14 samples/sec [1,0]:INFO:root:Epoch[128] Batch[100] Loss[4.015] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[100] rmse=0.020733 lr=0.222530 [1,0]:INFO:root:Epoch[128] Batch[200] Loss[2.924] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[200] rmse=0.020638 lr=0.222342 [1,0]:INFO:root:Epoch[128] Batch[300] Loss[3.182] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[300] rmse=0.020578 lr=0.222153 [1,0]:INFO:root:Epoch[128] Batch[400] Loss[3.418] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[400] rmse=0.020628 lr=0.221965 [1,0]:INFO:root:Epoch[128] Batch[500] Loss[4.656] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[500] rmse=0.020553 lr=0.221777 [1,0]:INFO:root:Epoch[128] Batch[600] Loss[3.384] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[600] rmse=0.020524 lr=0.221589 [1,0]:INFO:root:Epoch[128] Batch[700] Loss[2.936] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[700] rmse=0.020548 lr=0.221400 [1,0]:INFO:root:Epoch[128] Batch[800] Loss[2.570] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[800] rmse=0.020564 lr=0.221212 [1,0]:INFO:root:Epoch[128] Batch[900] Loss[3.013] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[900] rmse=0.020580 lr=0.221024 [1,0]:INFO:root:Epoch[128] Batch[1000] Loss[3.570] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[1000] rmse=0.020602 lr=0.220835 [1,0]:INFO:root:Epoch[128] Batch[1100] Loss[2.744] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[1100] rmse=0.020605 lr=0.220647 [1,0]:INFO:root:Epoch[128] Batch[1200] Loss[2.803] [1,0]:INFO:root:Epoch[128] Rank[0] Batch[1200] rmse=0.020608 lr=0.220459 [1,0]:INFO:root:Epoch[128] Rank[0] Batch[1251] Time cost=398.84 Train-metric=0.020613 [1,0]:INFO:root:Epoch[128] Speed: 3211.84 samples/sec [1,0]:INFO:root:Epoch[129] Batch[100] Loss[5.083] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[100] rmse=0.020456 lr=0.220174 [1,0]:INFO:root:Epoch[129] Batch[200] Loss[3.125] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[200] rmse=0.020497 lr=0.219986 [1,0]:INFO:root:Epoch[129] Batch[300] Loss[4.845] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[300] rmse=0.020569 lr=0.219797 [1,0]:INFO:root:Epoch[129] Batch[400] Loss[2.807] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[400] rmse=0.020590 lr=0.219609 [1,0]:INFO:root:Epoch[129] Batch[500] Loss[3.176] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[500] rmse=0.020585 lr=0.219420 [1,0]:INFO:root:Epoch[129] Batch[600] Loss[3.009] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[600] rmse=0.020610 lr=0.219232 [1,0]:INFO:root:Epoch[129] Batch[700] Loss[2.891] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[700] rmse=0.020615 lr=0.219043 [1,0]:INFO:root:Epoch[129] Batch[800] Loss[3.220] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[800] rmse=0.020629 lr=0.218855 [1,0]:INFO:root:Epoch[129] Batch[900] Loss[3.927] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[900] rmse=0.020638 lr=0.218666 [1,0]:INFO:root:Epoch[129] Batch[1000] Loss[3.477] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[1000] rmse=0.020635 lr=0.218478 [1,0]:INFO:root:Epoch[129] Batch[1100] Loss[2.991] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[1100] rmse=0.020640 lr=0.218289 [1,0]:INFO:root:Epoch[129] Batch[1200] Loss[2.818] [1,0]:INFO:root:Epoch[129] Rank[0] Batch[1200] rmse=0.020633 lr=0.218101 [1,0]:INFO:root:Epoch[129] Rank[0] Batch[1251] Time cost=399.58 Train-metric=0.020636 [1,0]:INFO:root:Epoch[129] Speed: 3205.91 samples/sec [1,0]:INFO:root:Epoch[129] Rank[0] Validation-accuracy=0.626560 Validation-top_k_accuracy_5=0.855000 [1,0]:INFO:root:Epoch[130] Batch[100] Loss[2.752] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[100] rmse=0.020431 lr=0.217816 [1,0]:INFO:root:Epoch[130] Batch[200] Loss[4.392] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[200] rmse=0.020531 lr=0.217627 [1,0]:INFO:root:Epoch[130] Batch[300] Loss[4.735] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[300] rmse=0.020550 lr=0.217438 [1,0]:INFO:root:Epoch[130] Batch[400] Loss[2.998] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[400] rmse=0.020563 lr=0.217250 [1,0]:INFO:root:Epoch[130] Batch[500] Loss[3.088] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[500] rmse=0.020578 lr=0.217061 [1,0]:INFO:root:Epoch[130] Batch[600] Loss[2.999] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[600] rmse=0.020596 lr=0.216872 [1,0]:INFO:root:Epoch[130] Batch[700] Loss[2.308] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[700] rmse=0.020613 lr=0.216684 [1,0]:INFO:root:Epoch[130] Batch[800] Loss[3.819] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[800] rmse=0.020664 lr=0.216495 [1,0]:INFO:root:Epoch[130] Batch[900] Loss[4.128] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[900] rmse=0.020650 lr=0.216306 [1,0]:INFO:root:Epoch[130] Batch[1000] Loss[2.948] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[1000] rmse=0.020640 lr=0.216118 [1,0]:INFO:root:Epoch[130] Batch[1100] Loss[4.915] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[1100] rmse=0.020642 lr=0.215929 [1,0]:INFO:root:Epoch[130] Batch[1200] Loss[2.979] [1,0]:INFO:root:Epoch[130] Rank[0] Batch[1200] rmse=0.020650 lr=0.215740 [1,0]:INFO:root:Epoch[130] Rank[0] Batch[1251] Time cost=398.55 Train-metric=0.020646 [1,0]:INFO:root:Epoch[130] Speed: 3214.24 samples/sec [1,0]:INFO:root:Epoch[131] Batch[100] Loss[2.979] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[100] rmse=0.020655 lr=0.215455 [1,0]:INFO:root:Epoch[131] Batch[200] Loss[2.870] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[200] rmse=0.020476 lr=0.215266 [1,0]:INFO:root:Epoch[131] Batch[300] Loss[2.849] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[300] rmse=0.020570 lr=0.215077 [1,0]:INFO:root:Epoch[131] Batch[400] Loss[5.382] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[400] rmse=0.020543 lr=0.214888 [1,0]:INFO:root:Epoch[131] Batch[500] Loss[2.620] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[500] rmse=0.020536 lr=0.214700 [1,0]:INFO:root:Epoch[131] Batch[600] Loss[2.815] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[600] rmse=0.020531 lr=0.214511 [1,0]:INFO:root:Epoch[131] Batch[700] Loss[2.549] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[700] rmse=0.020530 lr=0.214322 [1,0]:INFO:root:Epoch[131] Batch[800] Loss[2.884] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[800] rmse=0.020533 lr=0.214133 [1,0]:INFO:root:Epoch[131] Batch[900] Loss[5.264] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[900] rmse=0.020551 lr=0.213944 [1,0]:INFO:root:Epoch[131] Batch[1000] Loss[4.917] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[1000] rmse=0.020562 lr=0.213755 [1,0]:INFO:root:Epoch[131] Batch[1100] Loss[3.244] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[1100] rmse=0.020552 lr=0.213566 [1,0]:INFO:root:Epoch[131] Batch[1200] Loss[3.049] [1,0]:INFO:root:Epoch[131] Rank[0] Batch[1200] rmse=0.020572 lr=0.213377 [1,0]:INFO:root:Epoch[131] Rank[0] Batch[1251] Time cost=398.97 Train-metric=0.020573 [1,0]:INFO:root:Epoch[131] Speed: 3210.85 samples/sec [1,0]:INFO:root:Epoch[132] Batch[100] Loss[4.977] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[100] rmse=0.020711 lr=0.213092 [1,0]:INFO:root:Epoch[132] Batch[200] Loss[4.915] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[200] rmse=0.020656 lr=0.212903 [1,0]:INFO:root:Epoch[132] Batch[300] Loss[3.104] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[300] rmse=0.020629 lr=0.212714 [1,0]:INFO:root:Epoch[132] Batch[400] Loss[3.623] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[400] rmse=0.020616 lr=0.212525 [1,0]:INFO:root:Epoch[132] Batch[500] Loss[3.995] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[500] rmse=0.020541 lr=0.212336 [1,0]:INFO:root:Epoch[132] Batch[600] Loss[2.541] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[600] rmse=0.020556 lr=0.212147 [1,0]:INFO:root:Epoch[132] Batch[700] Loss[2.840] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[700] rmse=0.020547 lr=0.211958 [1,0]:INFO:root:Epoch[132] Batch[800] Loss[4.673] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[800] rmse=0.020560 lr=0.211769 [1,0]:INFO:root:Epoch[132] Batch[900] Loss[2.954] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[900] rmse=0.020581 lr=0.211580 [1,0]:INFO:root:Epoch[132] Batch[1000] Loss[4.423] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[1000] rmse=0.020578 lr=0.211391 [1,0]:INFO:root:Epoch[132] Batch[1100] Loss[2.964] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[1100] rmse=0.020599 lr=0.211202 [1,0]:INFO:root:Epoch[132] Batch[1200] Loss[2.993] [1,0]:INFO:root:Epoch[132] Rank[0] Batch[1200] rmse=0.020608 lr=0.211012 [1,0]:INFO:root:Epoch[132] Rank[0] Batch[1251] Time cost=399.24 Train-metric=0.020609 [1,0]:INFO:root:Epoch[132] Speed: 3208.69 samples/sec [1,0]:INFO:root:Epoch[133] Batch[100] Loss[3.051] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[100] rmse=0.020667 lr=0.210727 [1,0]:INFO:root:Epoch[133] Batch[200] Loss[4.816] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[200] rmse=0.020556 lr=0.210538 [1,0]:INFO:root:Epoch[133] Batch[300] Loss[5.240] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[300] rmse=0.020536 lr=0.210349 [1,0]:INFO:root:Epoch[133] Batch[400] Loss[3.667] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[400] rmse=0.020587 lr=0.210160 [1,0]:INFO:root:Epoch[133] Batch[500] Loss[2.564] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[500] rmse=0.020576 lr=0.209970 [1,0]:INFO:root:Epoch[133] Batch[600] Loss[2.997] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[600] rmse=0.020563 lr=0.209781 [1,0]:INFO:root:Epoch[133] Batch[700] Loss[5.187] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[700] rmse=0.020567 lr=0.209592 [1,0]:INFO:root:Epoch[133] Batch[800] Loss[3.047] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[800] rmse=0.020582 lr=0.209403 [1,0]:INFO:root:Epoch[133] Batch[900] Loss[5.049] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[900] rmse=0.020558 lr=0.209214 [1,0]:INFO:root:Epoch[133] Batch[1000] Loss[3.129] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[1000] rmse=0.020548 lr=0.209025 [1,0]:INFO:root:Epoch[133] Batch[1100] Loss[2.732] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[1100] rmse=0.020567 lr=0.208835 [1,0]:INFO:root:Epoch[133] Batch[1200] Loss[4.389] [1,0]:INFO:root:Epoch[133] Rank[0] Batch[1200] rmse=0.020564 lr=0.208646 [1,0]:INFO:root:Epoch[133] Rank[0] Batch[1251] Time cost=399.61 Train-metric=0.020571 [1,0]:INFO:root:Epoch[133] Speed: 3205.66 samples/sec [1,0]:INFO:root:Epoch[134] Batch[100] Loss[5.255] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[100] rmse=0.020327 lr=0.208360 [1,0]:INFO:root:Epoch[134] Batch[200] Loss[2.776] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[200] rmse=0.020426 lr=0.208171 [1,0]:INFO:root:Epoch[134] Batch[300] Loss[2.604] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[300] rmse=0.020471 lr=0.207982 [1,0]:INFO:root:Epoch[134] Batch[400] Loss[3.835] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[400] rmse=0.020469 lr=0.207793 [1,0]:INFO:root:Epoch[134] Batch[500] Loss[5.063] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[500] rmse=0.020498 lr=0.207604 [1,0]:INFO:root:Epoch[134] Batch[600] Loss[3.410] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[600] rmse=0.020496 lr=0.207414 [1,0]:INFO:root:Epoch[134] Batch[700] Loss[2.710] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[700] rmse=0.020506 lr=0.207225 [1,0]:INFO:root:Epoch[134] Batch[800] Loss[2.623] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[800] rmse=0.020520 lr=0.207036 [1,0]:INFO:root:Epoch[134] Batch[900] Loss[2.856] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[900] rmse=0.020529 lr=0.206847 [1,0]:INFO:root:Epoch[134] Batch[1000] Loss[3.929] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[1000] rmse=0.020526 lr=0.206657 [1,0]:INFO:root:Epoch[134] Batch[1100] Loss[3.269] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[1100] rmse=0.020522 lr=0.206468 [1,0]:INFO:root:Epoch[134] Batch[1200] Loss[4.449] [1,0]:INFO:root:Epoch[134] Rank[0] Batch[1200] rmse=0.020534 lr=0.206279 [1,0]:INFO:root:Epoch[134] Rank[0] Batch[1251] Time cost=399.35 Train-metric=0.020532 [1,0]:INFO:root:Epoch[134] Speed: 3207.73 samples/sec [1,0]:INFO:root:Epoch[134] Rank[0] Validation-accuracy=0.633520 Validation-top_k_accuracy_5=0.855260 [1,0]:INFO:root:Epoch[135] Batch[100] Loss[2.670] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[100] rmse=0.020461 lr=0.205993 [1,0]:INFO:root:Epoch[135] Batch[200] Loss[2.979] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[200] rmse=0.020396 lr=0.205804 [1,0]:INFO:root:Epoch[135] Batch[300] Loss[2.865] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[300] rmse=0.020417 lr=0.205614 [1,0]:INFO:root:Epoch[135] Batch[400] Loss[2.918] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[400] rmse=0.020394 lr=0.205425 [1,0]:INFO:root:Epoch[135] Batch[500] Loss[2.808] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[500] rmse=0.020397 lr=0.205236 [1,0]:INFO:root:Epoch[135] Batch[600] Loss[2.772] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[600] rmse=0.020377 lr=0.205046 [1,0]:INFO:root:Epoch[135] Batch[700] Loss[2.565] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[700] rmse=0.020400 lr=0.204857 [1,0]:INFO:root:Epoch[135] Batch[800] Loss[3.879] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[800] rmse=0.020400 lr=0.204668 [1,0]:INFO:root:Epoch[135] Batch[900] Loss[5.385] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[900] rmse=0.020425 lr=0.204478 [1,0]:INFO:root:Epoch[135] Batch[1000] Loss[4.687] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[1000] rmse=0.020440 lr=0.204289 [1,0]:INFO:root:Epoch[135] Batch[1100] Loss[2.855] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[1100] rmse=0.020451 lr=0.204100 [1,0]:INFO:root:Epoch[135] Batch[1200] Loss[2.909] [1,0]:INFO:root:Epoch[135] Rank[0] Batch[1200] rmse=0.020472 lr=0.203910 [1,0]:INFO:root:Epoch[135] Rank[0] Batch[1251] Time cost=399.71 Train-metric=0.020481 [1,0]:INFO:root:Epoch[135] Speed: 3204.87 samples/sec [1,0]:INFO:root:Epoch[136] Batch[100] Loss[3.021] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[100] rmse=0.020312 lr=0.203624 [1,0]:INFO:root:Epoch[136] Batch[200] Loss[2.918] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[200] rmse=0.020279 lr=0.203435 [1,0]:INFO:root:Epoch[136] Batch[300] Loss[4.671] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[300] rmse=0.020321 lr=0.203246 [1,0]:INFO:root:Epoch[136] Batch[400] Loss[2.968] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[400] rmse=0.020411 lr=0.203056 [1,0]:INFO:root:Epoch[136] Batch[500] Loss[2.746] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[500] rmse=0.020443 lr=0.202867 [1,0]:INFO:root:Epoch[136] Batch[600] Loss[4.421] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[600] rmse=0.020453 lr=0.202678 [1,0]:INFO:root:Epoch[136] Batch[700] Loss[4.731] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[700] rmse=0.020489 lr=0.202488 [1,0]:INFO:root:Epoch[136] Batch[800] Loss[3.606] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[800] rmse=0.020497 lr=0.202299 [1,0]:INFO:root:Epoch[136] Batch[900] Loss[5.157] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[900] rmse=0.020527 lr=0.202110 [1,0]:INFO:root:Epoch[136] Batch[1000] Loss[5.063] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[1000] rmse=0.020532 lr=0.201920 [1,0]:INFO:root:Epoch[136] Batch[1100] Loss[3.088] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[1100] rmse=0.020538 lr=0.201731 [1,0]:INFO:root:Epoch[136] Batch[1200] Loss[3.298] [1,0]:INFO:root:Epoch[136] Rank[0] Batch[1200] rmse=0.020550 lr=0.201542 [1,0]:INFO:root:Epoch[136] Rank[0] Batch[1251] Time cost=399.11 Train-metric=0.020562 [1,0]:INFO:root:Epoch[136] Speed: 3209.70 samples/sec [1,0]:INFO:root:Epoch[137] Batch[100] Loss[5.215] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[100] rmse=0.020477 lr=0.201256 [1,0]:INFO:root:Epoch[137] Batch[200] Loss[3.150] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[200] rmse=0.020490 lr=0.201066 [1,0]:INFO:root:Epoch[137] Batch[300] Loss[3.194] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[300] rmse=0.020554 lr=0.200877 [1,0]:INFO:root:Epoch[137] Batch[400] Loss[3.180] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[400] rmse=0.020540 lr=0.200687 [1,0]:INFO:root:Epoch[137] Batch[500] Loss[3.499] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[500] rmse=0.020538 lr=0.200498 [1,0]:INFO:root:Epoch[137] Batch[600] Loss[3.063] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[600] rmse=0.020530 lr=0.200309 [1,0]:INFO:root:Epoch[137] Batch[700] Loss[2.600] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[700] rmse=0.020517 lr=0.200119 [1,0]:INFO:root:Epoch[137] Batch[800] Loss[3.072] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[800] rmse=0.020546 lr=0.199930 [1,0]:INFO:root:Epoch[137] Batch[900] Loss[3.025] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[900] rmse=0.020558 lr=0.199741 [1,0]:INFO:root:Epoch[137] Batch[1000] Loss[3.323] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[1000] rmse=0.020542 lr=0.199551 [1,0]:INFO:root:Epoch[137] Batch[1100] Loss[4.178] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[1100] rmse=0.020534 lr=0.199362 [1,0]:INFO:root:Epoch[137] Batch[1200] Loss[3.885] [1,0]:INFO:root:Epoch[137] Rank[0] Batch[1200] rmse=0.020540 lr=0.199172 [1,0]:INFO:root:Epoch[137] Rank[0] Batch[1251] Time cost=398.92 Train-metric=0.020546 [1,0]:INFO:root:Epoch[137] Speed: 3211.23 samples/sec [1,0]:INFO:root:Epoch[138] Batch[100] Loss[2.632] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[100] rmse=0.020267 lr=0.198886 [1,0]:INFO:root:Epoch[138] Batch[200] Loss[2.925] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[200] rmse=0.020418 lr=0.198697 [1,0]:INFO:root:Epoch[138] Batch[300] Loss[4.241] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[300] rmse=0.020504 lr=0.198508 [1,0]:INFO:root:Epoch[138] Batch[400] Loss[3.333] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[400] rmse=0.020486 lr=0.198318 [1,0]:INFO:root:Epoch[138] Batch[500] Loss[4.565] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[500] rmse=0.020492 lr=0.198129 [1,0]:INFO:root:Epoch[138] Batch[600] Loss[3.253] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[600] rmse=0.020487 lr=0.197940 [1,0]:INFO:root:Epoch[138] Batch[700] Loss[3.604] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[700] rmse=0.020491 lr=0.197750 [1,0]:INFO:root:Epoch[138] Batch[800] Loss[2.763] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[800] rmse=0.020478 lr=0.197561 [1,0]:INFO:root:Epoch[138] Batch[900] Loss[2.710] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[900] rmse=0.020493 lr=0.197372 [1,0]:INFO:root:Epoch[138] Batch[1000] Loss[4.049] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[1000] rmse=0.020481 lr=0.197182 [1,0]:INFO:root:Epoch[138] Batch[1100] Loss[3.139] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[1100] rmse=0.020495 lr=0.196993 [1,0]:INFO:root:Epoch[138] Batch[1200] Loss[5.176] [1,0]:INFO:root:Epoch[138] Rank[0] Batch[1200] rmse=0.020501 lr=0.196803 [1,0]:INFO:root:Epoch[138] Rank[0] Batch[1251] Time cost=398.74 Train-metric=0.020494 [1,0]:INFO:root:Epoch[138] Speed: 3212.66 samples/sec [1,0]:INFO:root:Epoch[139] Batch[100] Loss[3.681] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[100] rmse=0.020201 lr=0.196518 [1,0]:INFO:root:Epoch[139] Batch[200] Loss[2.600] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[200] rmse=0.020277 lr=0.196328 [1,0]:INFO:root:Epoch[139] Batch[300] Loss[2.930] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[300] rmse=0.020381 lr=0.196139 [1,0]:INFO:root:Epoch[139] Batch[400] Loss[2.912] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[400] rmse=0.020360 lr=0.195949 [1,4]:[ip-172-31-29-212][[55333,1],4][btl_tcp.c:559:mca_btl_tcp_recv_blocking] [1,4]:recv(116) failed: Connection reset by peer (104)[1,4]: [1,0]:INFO:root:Epoch[139] Batch[500] Loss[4.357] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[500] rmse=0.020349 lr=0.195760 [1,0]:INFO:root:Epoch[139] Batch[600] Loss[2.826] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[600] rmse=0.020347 lr=0.195571 [1,0]:INFO:root:Epoch[139] Batch[700] Loss[2.855] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[700] rmse=0.020355 lr=0.195381 [1,0]:INFO:root:Epoch[139] Batch[800] Loss[2.868] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[800] rmse=0.020371 lr=0.195192 [1,0]:INFO:root:Epoch[139] Batch[900] Loss[3.388] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[900] rmse=0.020381 lr=0.195003 [1,0]:INFO:root:Epoch[139] Batch[1000] Loss[2.868] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[1000] rmse=0.020422 lr=0.194814 [1,0]:INFO:root:Epoch[139] Batch[1100] Loss[3.929] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[1100] rmse=0.020428 lr=0.194624 [1,0]:INFO:root:Epoch[139] Batch[1200] Loss[3.846] [1,0]:INFO:root:Epoch[139] Rank[0] Batch[1200] rmse=0.020438 lr=0.194435 [1,0]:INFO:root:Epoch[139] Rank[0] Batch[1251] Time cost=399.53 Train-metric=0.020436 [1,0]:INFO:root:Epoch[139] Speed: 3206.35 samples/sec [1,0]:INFO:root:Epoch[139] Rank[0] Validation-accuracy=0.639240 Validation-top_k_accuracy_5=0.859100 [1,0]:INFO:root:Epoch[140] Batch[100] Loss[2.889] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[100] rmse=0.020193 lr=0.194149 [1,0]:INFO:root:Epoch[140] Batch[200] Loss[5.259] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[200] rmse=0.020281 lr=0.193960 [1,0]:INFO:root:Epoch[140] Batch[300] Loss[2.833] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[300] rmse=0.020322 lr=0.193770 [1,0]:INFO:root:Epoch[140] Batch[400] Loss[2.608] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[400] rmse=0.020352 lr=0.193581 [1,0]:INFO:root:Epoch[140] Batch[500] Loss[3.658] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[500] rmse=0.020394 lr=0.193392 [1,0]:INFO:root:Epoch[140] Batch[600] Loss[2.970] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[600] rmse=0.020395 lr=0.193203 [1,0]:INFO:root:Epoch[140] Batch[700] Loss[4.625] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[700] rmse=0.020407 lr=0.193013 [1,0]:INFO:root:Epoch[140] Batch[800] Loss[4.667] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[800] rmse=0.020434 lr=0.192824 [1,0]:INFO:root:Epoch[140] Batch[900] Loss[3.544] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[900] rmse=0.020436 lr=0.192635 [1,0]:INFO:root:Epoch[140] Batch[1000] Loss[2.840] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[1000] rmse=0.020440 lr=0.192446 [1,0]:INFO:root:Epoch[140] Batch[1100] Loss[3.120] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[1100] rmse=0.020426 lr=0.192256 [1,0]:INFO:root:Epoch[140] Batch[1200] Loss[2.979] [1,0]:INFO:root:Epoch[140] Rank[0] Batch[1200] rmse=0.020444 lr=0.192067 [1,0]:INFO:root:Epoch[140] Rank[0] Batch[1251] Time cost=397.16 Train-metric=0.020455 [1,0]:INFO:root:Epoch[140] Speed: 3225.42 samples/sec [1,0]:INFO:root:Epoch[141] Batch[100] Loss[5.103] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[100] rmse=0.020342 lr=0.191781 [1,0]:INFO:root:Epoch[141] Batch[200] Loss[2.793] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[200] rmse=0.020334 lr=0.191592 [1,0]:INFO:root:Epoch[141] Batch[300] Loss[4.846] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[300] rmse=0.020366 lr=0.191403 [1,0]:INFO:root:Epoch[141] Batch[400] Loss[2.586] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[400] rmse=0.020359 lr=0.191214 [1,0]:INFO:root:Epoch[141] Batch[500] Loss[2.775] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[500] rmse=0.020385 lr=0.191025 [1,0]:INFO:root:Epoch[141] Batch[600] Loss[4.597] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[600] rmse=0.020405 lr=0.190835 [1,0]:INFO:root:Epoch[141] Batch[700] Loss[4.510] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[700] rmse=0.020398 lr=0.190646 [1,0]:INFO:root:Epoch[141] Batch[800] Loss[4.167] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[800] rmse=0.020400 lr=0.190457 [1,0]:INFO:root:Epoch[141] Batch[900] Loss[2.911] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[900] rmse=0.020405 lr=0.190268 [1,0]:INFO:root:Epoch[141] Batch[1000] Loss[4.285] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[1000] rmse=0.020388 lr=0.190079 [1,0]:INFO:root:Epoch[141] Batch[1100] Loss[4.014] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[1100] rmse=0.020392 lr=0.189890 [1,0]:INFO:root:Epoch[141] Batch[1200] Loss[4.957] [1,0]:INFO:root:Epoch[141] Rank[0] Batch[1200] rmse=0.020400 lr=0.189700 [1,0]:INFO:root:Epoch[141] Rank[0] Batch[1251] Time cost=399.36 Train-metric=0.020412 [1,0]:INFO:root:Epoch[141] Speed: 3207.65 samples/sec [1,0]:INFO:root:Epoch[142] Batch[100] Loss[2.645] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[100] rmse=0.020344 lr=0.189415 [1,0]:INFO:root:Epoch[142] Batch[200] Loss[3.564] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[200] rmse=0.020392 lr=0.189226 [1,0]:INFO:root:Epoch[142] Batch[300] Loss[5.010] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[300] rmse=0.020443 lr=0.189037 [1,0]:INFO:root:Epoch[142] Batch[400] Loss[2.501] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[400] rmse=0.020423 lr=0.188848 [1,0]:INFO:root:Epoch[142] Batch[500] Loss[3.339] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[500] rmse=0.020424 lr=0.188659 [1,0]:INFO:root:Epoch[142] Batch[600] Loss[2.999] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[600] rmse=0.020414 lr=0.188469 [1,0]:INFO:root:Epoch[142] Batch[700] Loss[2.715] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[700] rmse=0.020420 lr=0.188280 [1,0]:INFO:root:Epoch[142] Batch[800] Loss[4.537] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[800] rmse=0.020437 lr=0.188091 [1,0]:INFO:root:Epoch[142] Batch[900] Loss[2.697] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[900] rmse=0.020435 lr=0.187902 [1,0]:INFO:root:Epoch[142] Batch[1000] Loss[4.291] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[1000] rmse=0.020435 lr=0.187713 [1,0]:INFO:root:Epoch[142] Batch[1100] Loss[4.882] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[1100] rmse=0.020435 lr=0.187524 [1,0]:INFO:root:Epoch[142] Batch[1200] Loss[2.440] [1,0]:INFO:root:Epoch[142] Rank[0] Batch[1200] rmse=0.020444 lr=0.187335 [1,0]:INFO:root:Epoch[142] Rank[0] Batch[1251] Time cost=399.43 Train-metric=0.020452 [1,0]:INFO:root:Epoch[142] Speed: 3207.15 samples/sec [1,0]:INFO:root:Epoch[143] Batch[100] Loss[3.846] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[100] rmse=0.020147 lr=0.187050 [1,0]:INFO:root:Epoch[143] Batch[200] Loss[3.235] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[200] rmse=0.020257 lr=0.186861 [1,0]:INFO:root:Epoch[143] Batch[300] Loss[2.778] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[300] rmse=0.020342 lr=0.186672 [1,0]:INFO:root:Epoch[143] Batch[400] Loss[2.635] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[400] rmse=0.020379 lr=0.186483 [1,0]:INFO:root:Epoch[143] Batch[500] Loss[2.792] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[500] rmse=0.020429 lr=0.186294 [1,0]:INFO:root:Epoch[143] Batch[600] Loss[2.742] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[600] rmse=0.020434 lr=0.186105 [1,0]:INFO:root:Epoch[143] Batch[700] Loss[4.504] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[700] rmse=0.020429 lr=0.185916 [1,0]:INFO:root:Epoch[143] Batch[800] Loss[2.823] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[800] rmse=0.020420 lr=0.185727 [1,0]:INFO:root:Epoch[143] Batch[900] Loss[2.761] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[900] rmse=0.020424 lr=0.185538 [1,0]:INFO:root:Epoch[143] Batch[1000] Loss[3.934] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[1000] rmse=0.020427 lr=0.185350 [1,0]:INFO:root:Epoch[143] Batch[1100] Loss[2.908] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[1100] rmse=0.020430 lr=0.185161 [1,0]:INFO:root:Epoch[143] Batch[1200] Loss[2.965] [1,0]:INFO:root:Epoch[143] Rank[0] Batch[1200] rmse=0.020432 lr=0.184972 [1,0]:INFO:root:Epoch[143] Rank[0] Batch[1251] Time cost=399.64 Train-metric=0.020433 [1,0]:INFO:root:Epoch[143] Speed: 3205.42 samples/sec [1,0]:INFO:root:Epoch[144] Batch[100] Loss[2.627] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[100] rmse=0.020345 lr=0.184687 [1,0]:INFO:root:Epoch[144] Batch[200] Loss[3.447] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[200] rmse=0.020390 lr=0.184498 [1,0]:INFO:root:Epoch[144] Batch[300] Loss[2.709] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[300] rmse=0.020384 lr=0.184309 [1,0]:INFO:root:Epoch[144] Batch[400] Loss[2.705] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[400] rmse=0.020373 lr=0.184120 [1,0]:INFO:root:Epoch[144] Batch[500] Loss[2.979] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[500] rmse=0.020363 lr=0.183932 [1,0]:INFO:root:Epoch[144] Batch[600] Loss[2.689] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[600] rmse=0.020352 lr=0.183743 [1,0]:INFO:root:Epoch[144] Batch[700] Loss[3.904] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[700] rmse=0.020363 lr=0.183554 [1,0]:INFO:root:Epoch[144] Batch[800] Loss[3.006] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[800] rmse=0.020370 lr=0.183365 [1,0]:INFO:root:Epoch[144] Batch[900] Loss[3.342] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[900] rmse=0.020359 lr=0.183177 [1,0]:INFO:root:Epoch[144] Batch[1000] Loss[4.887] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[1000] rmse=0.020384 lr=0.182988 [1,0]:INFO:root:Epoch[144] Batch[1100] Loss[2.629] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[1100] rmse=0.020380 lr=0.182799 [1,0]:INFO:root:Epoch[144] Batch[1200] Loss[4.004] [1,0]:INFO:root:Epoch[144] Rank[0] Batch[1200] rmse=0.020351 lr=0.182611 [1,0]:INFO:root:Epoch[144] Rank[0] Batch[1251] Time cost=399.77 Train-metric=0.020359 [1,0]:INFO:root:Epoch[144] Speed: 3204.37 samples/sec [1,0]:INFO:root:Epoch[144] Rank[0] Validation-accuracy=0.647600 Validation-top_k_accuracy_5=0.863360 [1,0]:INFO:root:Epoch[145] Batch[100] Loss[4.492] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[100] rmse=0.020272 lr=0.182326 [1,0]:INFO:root:Epoch[145] Batch[200] Loss[5.061] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[200] rmse=0.020304 lr=0.182137 [1,0]:INFO:root:Epoch[145] Batch[300] Loss[2.753] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[300] rmse=0.020321 lr=0.181948 [1,0]:INFO:root:Epoch[145] Batch[400] Loss[3.133] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[400] rmse=0.020300 lr=0.181760 [1,0]:INFO:root:Epoch[145] Batch[500] Loss[2.806] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[500] rmse=0.020321 lr=0.181571 [1,0]:INFO:root:Epoch[145] Batch[600] Loss[2.612] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[600] rmse=0.020328 lr=0.181383 [1,0]:INFO:root:Epoch[145] Batch[700] Loss[2.838] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[700] rmse=0.020356 lr=0.181194 [1,0]:INFO:root:Epoch[145] Batch[800] Loss[3.753] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[800] rmse=0.020371 lr=0.181006 [1,0]:INFO:root:Epoch[145] Batch[900] Loss[2.823] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[900] rmse=0.020377 lr=0.180817 [1,0]:INFO:root:Epoch[145] Batch[1000] Loss[4.226] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[1000] rmse=0.020363 lr=0.180629 [1,0]:INFO:root:Epoch[145] Batch[1100] Loss[5.143] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[1100] rmse=0.020356 lr=0.180440 [1,0]:INFO:root:Epoch[145] Batch[1200] Loss[4.959] [1,0]:INFO:root:Epoch[145] Rank[0] Batch[1200] rmse=0.020355 lr=0.180252 [1,0]:INFO:root:Epoch[145] Rank[0] Batch[1251] Time cost=398.69 Train-metric=0.020353 [1,0]:INFO:root:Epoch[145] Speed: 3213.07 samples/sec [1,0]:INFO:root:Epoch[146] Batch[100] Loss[2.444] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[100] rmse=0.020277 lr=0.179967 [1,0]:INFO:root:Epoch[146] Batch[200] Loss[3.617] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[200] rmse=0.020218 lr=0.179779 [1,0]:INFO:root:Epoch[146] Batch[300] Loss[2.994] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[300] rmse=0.020266 lr=0.179590 [1,0]:INFO:root:Epoch[146] Batch[400] Loss[2.611] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[400] rmse=0.020281 lr=0.179402 [1,0]:INFO:root:Epoch[146] Batch[500] Loss[3.146] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[500] rmse=0.020354 lr=0.179214 [1,0]:INFO:root:Epoch[146] Batch[600] Loss[4.415] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[600] rmse=0.020317 lr=0.179025 [1,0]:INFO:root:Epoch[146] Batch[700] Loss[2.584] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[700] rmse=0.020315 lr=0.178837 [1,0]:INFO:root:Epoch[146] Batch[800] Loss[2.799] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[800] rmse=0.020303 lr=0.178649 [1,0]:INFO:root:Epoch[146] Batch[900] Loss[2.663] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[900] rmse=0.020329 lr=0.178460 [1,0]:INFO:root:Epoch[146] Batch[1000] Loss[2.642] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[1000] rmse=0.020328 lr=0.178272 [1,0]:INFO:root:Epoch[146] Batch[1100] Loss[2.790] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[1100] rmse=0.020340 lr=0.178084 [1,0]:INFO:root:Epoch[146] Batch[1200] Loss[2.553] [1,0]:INFO:root:Epoch[146] Rank[0] Batch[1200] rmse=0.020353 lr=0.177896 [1,0]:INFO:root:Epoch[146] Rank[0] Batch[1251] Time cost=398.84 Train-metric=0.020360 [1,0]:INFO:root:Epoch[146] Speed: 3211.91 samples/sec [1,0]:INFO:root:Epoch[147] Batch[100] Loss[2.677] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[100] rmse=0.020142 lr=0.177611 [1,0]:INFO:root:Epoch[147] Batch[200] Loss[4.892] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[200] rmse=0.020156 lr=0.177423 [1,0]:INFO:root:Epoch[147] Batch[300] Loss[5.189] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[300] rmse=0.020248 lr=0.177235 [1,0]:INFO:root:Epoch[147] Batch[400] Loss[2.791] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[400] rmse=0.020282 lr=0.177047 [1,0]:INFO:root:Epoch[147] Batch[500] Loss[2.705] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[500] rmse=0.020309 lr=0.176859 [1,0]:INFO:root:Epoch[147] Batch[600] Loss[3.816] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[600] rmse=0.020296 lr=0.176671 [1,0]:INFO:root:Epoch[147] Batch[700] Loss[2.620] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[700] rmse=0.020310 lr=0.176483 [1,0]:INFO:root:Epoch[147] Batch[800] Loss[3.965] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[800] rmse=0.020304 lr=0.176295 [1,0]:INFO:root:Epoch[147] Batch[900] Loss[4.286] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[900] rmse=0.020294 lr=0.176107 [1,0]:INFO:root:Epoch[147] Batch[1000] Loss[3.728] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[1000] rmse=0.020293 lr=0.175919 [1,0]:INFO:root:Epoch[147] Batch[1100] Loss[3.108] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[1100] rmse=0.020298 lr=0.175731 [1,0]:INFO:root:Epoch[147] Batch[1200] Loss[3.138] [1,0]:INFO:root:Epoch[147] Rank[0] Batch[1200] rmse=0.020309 lr=0.175543 [1,0]:INFO:root:Epoch[147] Rank[0] Batch[1251] Time cost=404.35 Train-metric=0.020319 [1,0]:INFO:root:Epoch[147] Speed: 3168.12 samples/sec [1,0]:INFO:root:Epoch[148] Batch[100] Loss[4.380] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[100] rmse=0.020142 lr=0.175259 [1,0]:INFO:root:Epoch[148] Batch[200] Loss[3.245] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[200] rmse=0.020235 lr=0.175071 [1,0]:INFO:root:Epoch[148] Batch[300] Loss[2.649] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[300] rmse=0.020277 lr=0.174883 [1,0]:INFO:root:Epoch[148] Batch[400] Loss[2.739] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[400] rmse=0.020258 lr=0.174695 [1,0]:INFO:root:Epoch[148] Batch[500] Loss[2.924] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[500] rmse=0.020248 lr=0.174507 [1,0]:INFO:root:Epoch[148] Batch[600] Loss[2.567] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[600] rmse=0.020286 lr=0.174319 [1,0]:INFO:root:Epoch[148] Batch[700] Loss[2.915] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[700] rmse=0.020305 lr=0.174132 [1,0]:INFO:root:Epoch[148] Batch[800] Loss[2.830] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[800] rmse=0.020298 lr=0.173944 [1,0]:INFO:root:Epoch[148] Batch[900] Loss[2.903] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[900] rmse=0.020299 lr=0.173756 [1,0]:INFO:root:Epoch[148] Batch[1000] Loss[5.075] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[1000] rmse=0.020307 lr=0.173568 [1,0]:INFO:root:Epoch[148] Batch[1100] Loss[3.012] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[1100] rmse=0.020325 lr=0.173381 [1,0]:INFO:root:Epoch[148] Batch[1200] Loss[4.857] [1,0]:INFO:root:Epoch[148] Rank[0] Batch[1200] rmse=0.020332 lr=0.173193 [1,0]:INFO:root:Epoch[148] Rank[0] Batch[1251] Time cost=401.04 Train-metric=0.020343 [1,0]:INFO:root:Epoch[148] Speed: 3194.26 samples/sec [1,0]:INFO:root:Epoch[149] Batch[100] Loss[2.498] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[100] rmse=0.020209 lr=0.172910 [1,0]:INFO:root:Epoch[149] Batch[200] Loss[4.700] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[200] rmse=0.020240 lr=0.172722 [1,0]:INFO:root:Epoch[149] Batch[300] Loss[3.654] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[300] rmse=0.020230 lr=0.172534 [1,0]:INFO:root:Epoch[149] Batch[400] Loss[2.784] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[400] rmse=0.020211 lr=0.172347 [1,0]:INFO:root:Epoch[149] Batch[500] Loss[5.228] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[500] rmse=0.020222 lr=0.172159 [1,0]:INFO:root:Epoch[149] Batch[600] Loss[2.619] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[600] rmse=0.020225 lr=0.171972 [1,0]:INFO:root:Epoch[149] Batch[700] Loss[2.656] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[700] rmse=0.020249 lr=0.171784 [1,0]:INFO:root:Epoch[149] Batch[800] Loss[4.053] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[800] rmse=0.020284 lr=0.171597 [1,0]:INFO:root:Epoch[149] Batch[900] Loss[4.806] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[900] rmse=0.020303 lr=0.171409 [1,0]:INFO:root:Epoch[149] Batch[1000] Loss[3.109] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[1000] rmse=0.020323 lr=0.171222 [1,0]:INFO:root:Epoch[149] Batch[1100] Loss[4.427] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[1100] rmse=0.020337 lr=0.171035 [1,0]:INFO:root:Epoch[149] Batch[1200] Loss[3.086] [1,0]:INFO:root:Epoch[149] Rank[0] Batch[1200] rmse=0.020332 lr=0.170847 [1,0]:INFO:root:Epoch[149] Rank[0] Batch[1251] Time cost=402.03 Train-metric=0.020327 [1,0]:INFO:root:Epoch[149] Speed: 3186.40 samples/sec [1,0]:INFO:root:Epoch[149] Rank[0] Validation-accuracy=0.648860 Validation-top_k_accuracy_5=0.866020 [1,0]:INFO:root:Epoch[150] Batch[100] Loss[3.380] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[100] rmse=0.019989 lr=0.170564 [1,0]:INFO:root:Epoch[150] Batch[200] Loss[2.980] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[200] rmse=0.020178 lr=0.170377 [1,0]:INFO:root:Epoch[150] Batch[300] Loss[4.684] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[300] rmse=0.020166 lr=0.170190 [1,0]:INFO:root:Epoch[150] Batch[400] Loss[2.772] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[400] rmse=0.020202 lr=0.170003 [1,0]:INFO:root:Epoch[150] Batch[500] Loss[3.681] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[500] rmse=0.020268 lr=0.169815 [1,0]:INFO:root:Epoch[150] Batch[600] Loss[4.110] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[600] rmse=0.020271 lr=0.169628 [1,0]:INFO:root:Epoch[150] Batch[700] Loss[2.985] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[700] rmse=0.020273 lr=0.169441 [1,0]:INFO:root:Epoch[150] Batch[800] Loss[3.607] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[800] rmse=0.020302 lr=0.169254 [1,0]:INFO:root:Epoch[150] Batch[900] Loss[5.175] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[900] rmse=0.020315 lr=0.169067 [1,0]:INFO:root:Epoch[150] Batch[1000] Loss[2.833] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[1000] rmse=0.020312 lr=0.168880 [1,0]:INFO:root:Epoch[150] Batch[1100] Loss[3.437] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[1100] rmse=0.020318 lr=0.168693 [1,0]:INFO:root:Epoch[150] Batch[1200] Loss[2.461] [1,0]:INFO:root:Epoch[150] Rank[0] Batch[1200] rmse=0.020323 lr=0.168506 [1,0]:INFO:root:Epoch[150] Rank[0] Batch[1251] Time cost=401.40 Train-metric=0.020331 [1,0]:INFO:root:Epoch[150] Speed: 3191.40 samples/sec [1,0]:INFO:root:Epoch[151] Batch[100] Loss[4.970] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[100] rmse=0.020214 lr=0.168223 [1,0]:INFO:root:Epoch[151] Batch[200] Loss[4.643] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[200] rmse=0.020201 lr=0.168036 [1,0]:INFO:root:Epoch[151] Batch[300] Loss[5.151] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[300] rmse=0.020212 lr=0.167849 [1,0]:INFO:root:Epoch[151] Batch[400] Loss[3.576] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[400] rmse=0.020269 lr=0.167662 [1,0]:INFO:root:Epoch[151] Batch[500] Loss[2.898] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[500] rmse=0.020284 lr=0.167475 [1,0]:INFO:root:Epoch[151] Batch[600] Loss[2.742] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[600] rmse=0.020285 lr=0.167289 [1,0]:INFO:root:Epoch[151] Batch[700] Loss[5.054] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[700] rmse=0.020308 lr=0.167102 [1,0]:INFO:root:Epoch[151] Batch[800] Loss[2.883] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[800] rmse=0.020311 lr=0.166915 [1,0]:INFO:root:Epoch[151] Batch[900] Loss[4.283] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[900] rmse=0.020324 lr=0.166728 [1,0]:INFO:root:Epoch[151] Batch[1000] Loss[4.464] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[1000] rmse=0.020329 lr=0.166542 [1,0]:INFO:root:Epoch[151] Batch[1100] Loss[2.727] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[1100] rmse=0.020341 lr=0.166355 [1,0]:INFO:root:Epoch[151] Batch[1200] Loss[2.607] [1,0]:INFO:root:Epoch[151] Rank[0] Batch[1200] rmse=0.020330 lr=0.166168 [1,0]:INFO:root:Epoch[151] Rank[0] Batch[1251] Time cost=399.24 Train-metric=0.020335 [1,0]:INFO:root:Epoch[151] Speed: 3208.66 samples/sec [1,0]:INFO:root:Epoch[152] Batch[100] Loss[4.715] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[100] rmse=0.020245 lr=0.165886 [1,0]:INFO:root:Epoch[152] Batch[200] Loss[3.175] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[200] rmse=0.020204 lr=0.165700 [1,0]:INFO:root:Epoch[152] Batch[300] Loss[2.904] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[300] rmse=0.020204 lr=0.165513 [1,0]:INFO:root:Epoch[152] Batch[400] Loss[2.510] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[400] rmse=0.020231 lr=0.165327 [1,0]:INFO:root:Epoch[152] Batch[500] Loss[3.408] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[500] rmse=0.020241 lr=0.165140 [1,0]:INFO:root:Epoch[152] Batch[600] Loss[4.702] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[600] rmse=0.020215 lr=0.164954 [1,0]:INFO:root:Epoch[152] Batch[700] Loss[2.784] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[700] rmse=0.020230 lr=0.164767 [1,0]:INFO:root:Epoch[152] Batch[800] Loss[2.555] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[800] rmse=0.020250 lr=0.164581 [1,0]:INFO:root:Epoch[152] Batch[900] Loss[3.142] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[900] rmse=0.020240 lr=0.164395 [1,0]:INFO:root:Epoch[152] Batch[1000] Loss[2.596] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[1000] rmse=0.020240 lr=0.164208 [1,0]:INFO:root:Epoch[152] Batch[1100] Loss[3.213] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[1100] rmse=0.020240 lr=0.164022 [1,0]:INFO:root:Epoch[152] Batch[1200] Loss[2.945] [1,0]:INFO:root:Epoch[152] Rank[0] Batch[1200] rmse=0.020240 lr=0.163836 [1,0]:INFO:root:Epoch[152] Rank[0] Batch[1251] Time cost=398.98 Train-metric=0.020236 [1,0]:INFO:root:Epoch[152] Speed: 3210.73 samples/sec [1,0]:INFO:root:Epoch[153] Batch[100] Loss[4.846] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[100] rmse=0.020242 lr=0.163554 [1,0]:INFO:root:Epoch[153] Batch[200] Loss[2.611] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[200] rmse=0.020179 lr=0.163368 [1,0]:INFO:root:Epoch[153] Batch[300] Loss[3.374] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[300] rmse=0.020206 lr=0.163182 [1,0]:INFO:root:Epoch[153] Batch[400] Loss[3.784] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[400] rmse=0.020180 lr=0.162996 [1,0]:INFO:root:Epoch[153] Batch[500] Loss[5.000] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[500] rmse=0.020181 lr=0.162810 [1,0]:INFO:root:Epoch[153] Batch[600] Loss[3.536] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[600] rmse=0.020179 lr=0.162624 [1,0]:INFO:root:Epoch[153] Batch[700] Loss[3.961] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[700] rmse=0.020173 lr=0.162438 [1,0]:INFO:root:Epoch[153] Batch[800] Loss[2.787] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[800] rmse=0.020191 lr=0.162252 [1,0]:INFO:root:Epoch[153] Batch[900] Loss[2.754] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[900] rmse=0.020214 lr=0.162066 [1,0]:INFO:root:Epoch[153] Batch[1000] Loss[5.287] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[1000] rmse=0.020234 lr=0.161880 [1,0]:INFO:root:Epoch[153] Batch[1100] Loss[4.343] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[1100] rmse=0.020245 lr=0.161694 [1,0]:INFO:root:Epoch[153] Batch[1200] Loss[4.877] [1,0]:INFO:root:Epoch[153] Rank[0] Batch[1200] rmse=0.020252 lr=0.161508 [1,0]:INFO:root:Epoch[153] Rank[0] Batch[1251] Time cost=399.32 Train-metric=0.020254 [1,0]:INFO:root:Epoch[153] Speed: 3208.00 samples/sec [1,0]:INFO:root:Epoch[154] Batch[100] Loss[3.421] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[100] rmse=0.020001 lr=0.161228 [1,0]:INFO:root:Epoch[154] Batch[200] Loss[2.706] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[200] rmse=0.020203 lr=0.161042 [1,0]:INFO:root:Epoch[154] Batch[300] Loss[2.606] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[300] rmse=0.020189 lr=0.160856 [1,0]:INFO:root:Epoch[154] Batch[400] Loss[5.021] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[400] rmse=0.020212 lr=0.160670 [1,0]:INFO:root:Epoch[154] Batch[500] Loss[4.257] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[500] rmse=0.020177 lr=0.160485 [1,0]:INFO:root:Epoch[154] Batch[600] Loss[4.493] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[600] rmse=0.020207 lr=0.160299 [1,0]:INFO:root:Epoch[154] Batch[700] Loss[2.588] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[700] rmse=0.020204 lr=0.160114 [1,0]:INFO:root:Epoch[154] Batch[800] Loss[2.770] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[800] rmse=0.020205 lr=0.159928 [1,0]:INFO:root:Epoch[154] Batch[900] Loss[4.775] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[900] rmse=0.020226 lr=0.159742 [1,0]:INFO:root:Epoch[154] Batch[1000] Loss[2.707] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[1000] rmse=0.020230 lr=0.159557 [1,0]:INFO:root:Epoch[154] Batch[1100] Loss[4.969] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[1100] rmse=0.020265 lr=0.159372 [1,0]:INFO:root:Epoch[154] Batch[1200] Loss[2.751] [1,0]:INFO:root:Epoch[154] Rank[0] Batch[1200] rmse=0.020283 lr=0.159186 [1,0]:INFO:root:Epoch[154] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.020274 [1,0]:INFO:root:Epoch[154] Speed: 3207.91 samples/sec [1,0]:INFO:root:Epoch[154] Rank[0] Validation-accuracy=0.647400 Validation-top_k_accuracy_5=0.865260 [1,0]:INFO:root:Epoch[155] Batch[100] Loss[2.531] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[100] rmse=0.020182 lr=0.158906 [1,0]:INFO:root:Epoch[155] Batch[200] Loss[2.765] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[200] rmse=0.020178 lr=0.158721 [1,0]:INFO:root:Epoch[155] Batch[300] Loss[4.566] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[300] rmse=0.020177 lr=0.158536 [1,0]:INFO:root:Epoch[155] Batch[400] Loss[2.799] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[400] rmse=0.020167 lr=0.158350 [1,0]:INFO:root:Epoch[155] Batch[500] Loss[4.911] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[500] rmse=0.020195 lr=0.158165 [1,0]:INFO:root:Epoch[155] Batch[600] Loss[3.744] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[600] rmse=0.020146 lr=0.157980 [1,0]:INFO:root:Epoch[155] Batch[700] Loss[4.707] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[700] rmse=0.020162 lr=0.157795 [1,0]:INFO:root:Epoch[155] Batch[800] Loss[4.942] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[800] rmse=0.020188 lr=0.157610 [1,0]:INFO:root:Epoch[155] Batch[900] Loss[2.615] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[900] rmse=0.020182 lr=0.157425 [1,0]:INFO:root:Epoch[155] Batch[1000] Loss[2.935] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[1000] rmse=0.020201 lr=0.157240 [1,0]:INFO:root:Epoch[155] Batch[1100] Loss[2.857] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[1100] rmse=0.020217 lr=0.157055 [1,0]:INFO:root:Epoch[155] Batch[1200] Loss[4.613] [1,0]:INFO:root:Epoch[155] Rank[0] Batch[1200] rmse=0.020218 lr=0.156870 [1,0]:INFO:root:Epoch[155] Rank[0] Batch[1251] Time cost=398.96 Train-metric=0.020229 [1,0]:INFO:root:Epoch[155] Speed: 3210.94 samples/sec [1,0]:INFO:root:Epoch[156] Batch[100] Loss[2.830] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[100] rmse=0.020041 lr=0.156591 [1,0]:INFO:root:Epoch[156] Batch[200] Loss[2.871] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[200] rmse=0.020121 lr=0.156406 [1,0]:INFO:root:Epoch[156] Batch[300] Loss[2.748] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[300] rmse=0.020118 lr=0.156221 [1,0]:INFO:root:Epoch[156] Batch[400] Loss[2.543] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[400] rmse=0.020134 lr=0.156036 [1,0]:INFO:root:Epoch[156] Batch[500] Loss[3.118] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[500] rmse=0.020131 lr=0.155851 [1,0]:INFO:root:Epoch[156] Batch[600] Loss[4.967] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[600] rmse=0.020155 lr=0.155667 [1,0]:INFO:root:Epoch[156] Batch[700] Loss[2.584] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[700] rmse=0.020166 lr=0.155482 [1,0]:INFO:root:Epoch[156] Batch[800] Loss[2.789] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[800] rmse=0.020146 lr=0.155297 [1,0]:INFO:root:Epoch[156] Batch[900] Loss[3.797] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[900] rmse=0.020151 lr=0.155113 [1,0]:INFO:root:Epoch[156] Batch[1000] Loss[2.791] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[1000] rmse=0.020164 lr=0.154928 [1,0]:INFO:root:Epoch[156] Batch[1100] Loss[3.248] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[1100] rmse=0.020176 lr=0.154744 [1,0]:INFO:root:Epoch[156] Batch[1200] Loss[3.028] [1,0]:INFO:root:Epoch[156] Rank[0] Batch[1200] rmse=0.020178 lr=0.154559 [1,0]:INFO:root:Epoch[156] Rank[0] Batch[1251] Time cost=399.17 Train-metric=0.020187 [1,0]:INFO:root:Epoch[156] Speed: 3209.22 samples/sec [1,0]:INFO:root:Epoch[157] Batch[100] Loss[2.764] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[100] rmse=0.020223 lr=0.154281 [1,0]:INFO:root:Epoch[157] Batch[200] Loss[3.041] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[200] rmse=0.020185 lr=0.154097 [1,0]:INFO:root:Epoch[157] Batch[300] Loss[2.458] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[300] rmse=0.020159 lr=0.153912 [1,0]:INFO:root:Epoch[157] Batch[400] Loss[3.169] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[400] rmse=0.020168 lr=0.153728 [1,0]:INFO:root:Epoch[157] Batch[500] Loss[4.152] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[500] rmse=0.020184 lr=0.153544 [1,0]:INFO:root:Epoch[157] Batch[600] Loss[2.871] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[600] rmse=0.020166 lr=0.153360 [1,0]:INFO:root:Epoch[157] Batch[700] Loss[2.798] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[700] rmse=0.020160 lr=0.153176 [1,0]:INFO:root:Epoch[157] Batch[800] Loss[2.650] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[800] rmse=0.020161 lr=0.152991 [1,0]:INFO:root:Epoch[157] Batch[900] Loss[2.466] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[900] rmse=0.020169 lr=0.152807 [1,0]:INFO:root:Epoch[157] Batch[1000] Loss[2.941] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[1000] rmse=0.020183 lr=0.152623 [1,0]:INFO:root:Epoch[157] Batch[1100] Loss[2.752] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[1100] rmse=0.020177 lr=0.152439 [1,0]:INFO:root:Epoch[157] Batch[1200] Loss[2.735] [1,0]:INFO:root:Epoch[157] Rank[0] Batch[1200] rmse=0.020174 lr=0.152256 [1,0]:INFO:root:Epoch[157] Rank[0] Batch[1251] Time cost=400.26 Train-metric=0.020179 [1,0]:INFO:root:Epoch[157] Speed: 3200.52 samples/sec [1,0]:INFO:root:Epoch[158] Batch[100] Loss[4.900] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[100] rmse=0.020045 lr=0.151978 [1,0]:INFO:root:Epoch[158] Batch[200] Loss[4.489] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[200] rmse=0.020108 lr=0.151794 [1,0]:INFO:root:Epoch[158] Batch[300] Loss[2.704] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[300] rmse=0.020148 lr=0.151610 [1,0]:INFO:root:Epoch[158] Batch[400] Loss[3.159] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[400] rmse=0.020101 lr=0.151427 [1,0]:INFO:root:Epoch[158] Batch[500] Loss[4.598] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[500] rmse=0.020079 lr=0.151243 [1,0]:INFO:root:Epoch[158] Batch[600] Loss[2.579] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[600] rmse=0.020089 lr=0.151059 [1,0]:INFO:root:Epoch[158] Batch[700] Loss[2.494] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[700] rmse=0.020096 lr=0.150876 [1,0]:INFO:root:Epoch[158] Batch[800] Loss[3.339] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[800] rmse=0.020093 lr=0.150692 [1,0]:INFO:root:Epoch[158] Batch[900] Loss[2.812] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[900] rmse=0.020103 lr=0.150509 [1,0]:INFO:root:Epoch[158] Batch[1000] Loss[2.648] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[1000] rmse=0.020102 lr=0.150325 [1,0]:INFO:root:Epoch[158] Batch[1100] Loss[2.586] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[1100] rmse=0.020120 lr=0.150142 [1,0]:INFO:root:Epoch[158] Batch[1200] Loss[2.886] [1,0]:INFO:root:Epoch[158] Rank[0] Batch[1200] rmse=0.020133 lr=0.149958 [1,0]:INFO:root:Epoch[158] Rank[0] Batch[1251] Time cost=400.20 Train-metric=0.020140 [1,0]:INFO:root:Epoch[158] Speed: 3200.92 samples/sec [1,0]:INFO:root:Epoch[159] Batch[100] Loss[3.097] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[100] rmse=0.020010 lr=0.149682 [1,0]:INFO:root:Epoch[159] Batch[200] Loss[2.519] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[200] rmse=0.020051 lr=0.149498 [1,0]:INFO:root:Epoch[159] Batch[300] Loss[2.713] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[300] rmse=0.020108 lr=0.149315 [1,0]:INFO:root:Epoch[159] Batch[400] Loss[3.084] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[400] rmse=0.020080 lr=0.149132 [1,0]:INFO:root:Epoch[159] Batch[500] Loss[2.517] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[500] rmse=0.020106 lr=0.148949 [1,0]:INFO:root:Epoch[159] Batch[600] Loss[2.895] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[600] rmse=0.020095 lr=0.148766 [1,0]:INFO:root:Epoch[159] Batch[700] Loss[4.423] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[700] rmse=0.020100 lr=0.148583 [1,0]:INFO:root:Epoch[159] Batch[800] Loss[3.050] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[800] rmse=0.020108 lr=0.148400 [1,0]:INFO:root:Epoch[159] Batch[900] Loss[2.825] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[900] rmse=0.020124 lr=0.148217 [1,0]:INFO:root:Epoch[159] Batch[1000] Loss[4.368] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[1000] rmse=0.020116 lr=0.148034 [1,0]:INFO:root:Epoch[159] Batch[1100] Loss[2.845] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[1100] rmse=0.020123 lr=0.147851 [1,0]:INFO:root:Epoch[159] Batch[1200] Loss[2.756] [1,0]:INFO:root:Epoch[159] Rank[0] Batch[1200] rmse=0.020126 lr=0.147668 [1,0]:INFO:root:Epoch[159] Rank[0] Batch[1251] Time cost=398.98 Train-metric=0.020131 [1,0]:INFO:root:Epoch[159] Speed: 3210.72 samples/sec [1,0]:INFO:root:Epoch[159] Rank[0] Validation-accuracy=0.653660 Validation-top_k_accuracy_5=0.870760 [1,0]:INFO:root:Epoch[160] Batch[100] Loss[5.206] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[100] rmse=0.020133 lr=0.147392 [1,0]:INFO:root:Epoch[160] Batch[200] Loss[2.722] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[200] rmse=0.020021 lr=0.147209 [1,0]:INFO:root:Epoch[160] Batch[300] Loss[2.637] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[300] rmse=0.020028 lr=0.147027 [1,0]:INFO:root:Epoch[160] Batch[400] Loss[2.926] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[400] rmse=0.020039 lr=0.146844 [1,0]:INFO:root:Epoch[160] Batch[500] Loss[2.895] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[500] rmse=0.020050 lr=0.146662 [1,0]:INFO:root:Epoch[160] Batch[600] Loss[4.385] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[600] rmse=0.020053 lr=0.146479 [1,0]:INFO:root:Epoch[160] Batch[700] Loss[5.230] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[700] rmse=0.020078 lr=0.146297 [1,0]:INFO:root:Epoch[160] Batch[800] Loss[2.920] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[800] rmse=0.020109 lr=0.146114 [1,0]:INFO:root:Epoch[160] Batch[900] Loss[2.813] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[900] rmse=0.020128 lr=0.145932 [1,0]:INFO:root:Epoch[160] Batch[1000] Loss[2.538] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[1000] rmse=0.020160 lr=0.145750 [1,0]:INFO:root:Epoch[160] Batch[1100] Loss[3.816] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[1100] rmse=0.020159 lr=0.145567 [1,0]:INFO:root:Epoch[160] Batch[1200] Loss[3.140] [1,0]:INFO:root:Epoch[160] Rank[0] Batch[1200] rmse=0.020171 lr=0.145385 [1,0]:INFO:root:Epoch[160] Rank[0] Batch[1251] Time cost=397.75 Train-metric=0.020178 [1,0]:INFO:root:Epoch[160] Speed: 3220.65 samples/sec [1,0]:INFO:root:Epoch[161] Batch[100] Loss[2.658] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[100] rmse=0.020097 lr=0.145110 [1,0]:INFO:root:Epoch[161] Batch[200] Loss[2.916] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[200] rmse=0.020108 lr=0.144928 [1,0]:INFO:root:Epoch[161] Batch[300] Loss[4.910] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[300] rmse=0.020080 lr=0.144746 [1,0]:INFO:root:Epoch[161] Batch[400] Loss[3.146] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[400] rmse=0.020079 lr=0.144564 [1,0]:INFO:root:Epoch[161] Batch[500] Loss[4.639] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[500] rmse=0.020101 lr=0.144382 [1,0]:INFO:root:Epoch[161] Batch[600] Loss[3.110] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[600] rmse=0.020111 lr=0.144200 [1,0]:INFO:root:Epoch[161] Batch[700] Loss[4.997] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[700] rmse=0.020096 lr=0.144018 [1,0]:INFO:root:Epoch[161] Batch[800] Loss[2.776] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[800] rmse=0.020076 lr=0.143837 [1,0]:INFO:root:Epoch[161] Batch[900] Loss[2.884] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[900] rmse=0.020096 lr=0.143655 [1,0]:INFO:root:Epoch[161] Batch[1000] Loss[2.809] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[1000] rmse=0.020105 lr=0.143473 [1,0]:INFO:root:Epoch[161] Batch[1100] Loss[2.484] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[1100] rmse=0.020108 lr=0.143292 [1,0]:INFO:root:Epoch[161] Batch[1200] Loss[5.201] [1,0]:INFO:root:Epoch[161] Rank[0] Batch[1200] rmse=0.020116 lr=0.143110 [1,0]:INFO:root:Epoch[161] Rank[0] Batch[1251] Time cost=399.43 Train-metric=0.020115 [1,0]:INFO:root:Epoch[161] Speed: 3207.13 samples/sec [1,0]:INFO:root:Epoch[162] Batch[100] Loss[3.604] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[100] rmse=0.020027 lr=0.142836 [1,0]:INFO:root:Epoch[162] Batch[200] Loss[4.960] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[200] rmse=0.019963 lr=0.142655 [1,0]:INFO:root:Epoch[162] Batch[300] Loss[2.786] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[300] rmse=0.019980 lr=0.142473 [1,0]:INFO:root:Epoch[162] Batch[400] Loss[3.019] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[400] rmse=0.019966 lr=0.142292 [1,0]:INFO:root:Epoch[162] Batch[500] Loss[2.480] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[500] rmse=0.020010 lr=0.142110 [1,0]:INFO:root:Epoch[162] Batch[600] Loss[2.714] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[600] rmse=0.019992 lr=0.141929 [1,0]:INFO:root:Epoch[162] Batch[700] Loss[4.838] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[700] rmse=0.019961 lr=0.141748 [1,0]:INFO:root:Epoch[162] Batch[800] Loss[2.544] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[800] rmse=0.019962 lr=0.141567 [1,0]:INFO:root:Epoch[162] Batch[900] Loss[2.996] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[900] rmse=0.020004 lr=0.141386 [1,0]:INFO:root:Epoch[162] Batch[1000] Loss[3.081] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[1000] rmse=0.020010 lr=0.141205 [1,0]:INFO:root:Epoch[162] Batch[1100] Loss[2.542] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[1100] rmse=0.020021 lr=0.141024 [1,0]:INFO:root:Epoch[162] Batch[1200] Loss[2.802] [1,0]:INFO:root:Epoch[162] Rank[0] Batch[1200] rmse=0.020044 lr=0.140843 [1,0]:INFO:root:Epoch[162] Rank[0] Batch[1251] Time cost=398.89 Train-metric=0.020049 [1,0]:INFO:root:Epoch[162] Speed: 3211.51 samples/sec [1,0]:INFO:root:Epoch[163] Batch[100] Loss[2.805] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[100] rmse=0.019920 lr=0.140570 [1,0]:INFO:root:Epoch[163] Batch[200] Loss[2.693] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[200] rmse=0.019875 lr=0.140389 [1,0]:INFO:root:Epoch[163] Batch[300] Loss[2.679] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[300] rmse=0.019882 lr=0.140208 [1,0]:INFO:root:Epoch[163] Batch[400] Loss[2.899] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[400] rmse=0.019939 lr=0.140028 [1,0]:INFO:root:Epoch[163] Batch[500] Loss[3.141] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[500] rmse=0.019960 lr=0.139847 [1,0]:INFO:root:Epoch[163] Batch[600] Loss[4.366] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[600] rmse=0.019968 lr=0.139666 [1,0]:INFO:root:Epoch[163] Batch[700] Loss[2.704] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[700] rmse=0.019983 lr=0.139486 [1,0]:INFO:root:Epoch[163] Batch[800] Loss[2.648] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[800] rmse=0.020015 lr=0.139305 [1,0]:INFO:root:Epoch[163] Batch[900] Loss[4.365] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[900] rmse=0.020022 lr=0.139125 [1,0]:INFO:root:Epoch[163] Batch[1000] Loss[2.640] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[1000] rmse=0.020033 lr=0.138945 [1,0]:INFO:root:Epoch[163] Batch[1100] Loss[4.444] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[1100] rmse=0.020058 lr=0.138764 [1,0]:INFO:root:Epoch[163] Batch[1200] Loss[3.987] [1,0]:INFO:root:Epoch[163] Rank[0] Batch[1200] rmse=0.020069 lr=0.138584 [1,0]:INFO:root:Epoch[163] Rank[0] Batch[1251] Time cost=399.32 Train-metric=0.020068 [1,0]:INFO:root:Epoch[163] Speed: 3208.00 samples/sec [1,0]:INFO:root:Epoch[164] Batch[100] Loss[3.053] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[100] rmse=0.019857 lr=0.138312 [1,0]:INFO:root:Epoch[164] Batch[200] Loss[2.551] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[200] rmse=0.019955 lr=0.138132 [1,0]:INFO:root:Epoch[164] Batch[300] Loss[2.778] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[300] rmse=0.019992 lr=0.137952 [1,0]:INFO:root:Epoch[164] Batch[400] Loss[3.667] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[400] rmse=0.020021 lr=0.137772 [1,0]:INFO:root:Epoch[164] Batch[500] Loss[5.164] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[500] rmse=0.020007 lr=0.137592 [1,0]:INFO:root:Epoch[164] Batch[600] Loss[4.637] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[600] rmse=0.020040 lr=0.137412 [1,0]:INFO:root:Epoch[164] Batch[700] Loss[2.486] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[700] rmse=0.020052 lr=0.137232 [1,0]:INFO:root:Epoch[164] Batch[800] Loss[4.439] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[800] rmse=0.020049 lr=0.137052 [1,0]:INFO:root:Epoch[164] Batch[900] Loss[2.562] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[900] rmse=0.020044 lr=0.136873 [1,0]:INFO:root:Epoch[164] Batch[1000] Loss[3.832] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[1000] rmse=0.020043 lr=0.136693 [1,0]:INFO:root:Epoch[164] Batch[1100] Loss[2.520] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[1100] rmse=0.020043 lr=0.136513 [1,0]:INFO:root:Epoch[164] Batch[1200] Loss[2.528] [1,0]:INFO:root:Epoch[164] Rank[0] Batch[1200] rmse=0.020064 lr=0.136334 [1,0]:INFO:root:Epoch[164] Rank[0] Batch[1251] Time cost=399.21 Train-metric=0.020063 [1,0]:INFO:root:Epoch[164] Speed: 3208.91 samples/sec [1,0]:INFO:root:Epoch[164] Rank[0] Validation-accuracy=0.666320 Validation-top_k_accuracy_5=0.876800 [1,0]:INFO:root:Epoch[165] Batch[100] Loss[2.770] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[100] rmse=0.020084 lr=0.136063 [1,0]:INFO:root:Epoch[165] Batch[200] Loss[2.843] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[200] rmse=0.020120 lr=0.135883 [1,0]:INFO:root:Epoch[165] Batch[300] Loss[4.839] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[300] rmse=0.020024 lr=0.135704 [1,0]:INFO:root:Epoch[165] Batch[400] Loss[2.667] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[400] rmse=0.020020 lr=0.135525 [1,0]:INFO:root:Epoch[165] Batch[500] Loss[2.839] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[500] rmse=0.020038 lr=0.135345 [1,0]:INFO:root:Epoch[165] Batch[600] Loss[2.691] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[600] rmse=0.020049 lr=0.135166 [1,0]:INFO:root:Epoch[165] Batch[700] Loss[4.706] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[700] rmse=0.020083 lr=0.134987 [1,0]:INFO:root:Epoch[165] Batch[800] Loss[2.962] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[800] rmse=0.020082 lr=0.134808 [1,0]:INFO:root:Epoch[165] Batch[900] Loss[2.756] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[900] rmse=0.020063 lr=0.134629 [1,0]:INFO:root:Epoch[165] Batch[1000] Loss[2.771] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[1000] rmse=0.020064 lr=0.134450 [1,0]:INFO:root:Epoch[165] Batch[1100] Loss[3.368] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[1100] rmse=0.020059 lr=0.134271 [1,0]:INFO:root:Epoch[165] Batch[1200] Loss[3.129] [1,0]:INFO:root:Epoch[165] Rank[0] Batch[1200] rmse=0.020062 lr=0.134092 [1,0]:INFO:root:Epoch[165] Rank[0] Batch[1251] Time cost=398.86 Train-metric=0.020067 [1,0]:INFO:root:Epoch[165] Speed: 3211.72 samples/sec [1,0]:INFO:root:Epoch[166] Batch[100] Loss[2.910] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[100] rmse=0.019862 lr=0.133822 [1,0]:INFO:root:Epoch[166] Batch[200] Loss[2.422] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[200] rmse=0.019895 lr=0.133644 [1,0]:INFO:root:Epoch[166] Batch[300] Loss[3.838] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[300] rmse=0.019864 lr=0.133465 [1,0]:INFO:root:Epoch[166] Batch[400] Loss[2.832] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[400] rmse=0.019942 lr=0.133287 [1,0]:INFO:root:Epoch[166] Batch[500] Loss[2.518] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[500] rmse=0.019952 lr=0.133108 [1,0]:INFO:root:Epoch[166] Batch[600] Loss[2.388] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[600] rmse=0.019962 lr=0.132930 [1,0]:INFO:root:Epoch[166] Batch[700] Loss[2.677] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[700] rmse=0.019963 lr=0.132751 [1,0]:INFO:root:Epoch[166] Batch[800] Loss[2.972] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[800] rmse=0.019965 lr=0.132573 [1,0]:INFO:root:Epoch[166] Batch[900] Loss[4.061] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[900] rmse=0.019965 lr=0.132395 [1,0]:INFO:root:Epoch[166] Batch[1000] Loss[2.644] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[1000] rmse=0.019983 lr=0.132216 [1,0]:INFO:root:Epoch[166] Batch[1100] Loss[3.418] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[1100] rmse=0.019991 lr=0.132038 [1,0]:INFO:root:Epoch[166] Batch[1200] Loss[5.146] [1,0]:INFO:root:Epoch[166] Rank[0] Batch[1200] rmse=0.019994 lr=0.131860 [1,0]:INFO:root:Epoch[166] Rank[0] Batch[1251] Time cost=399.41 Train-metric=0.019988 [1,0]:INFO:root:Epoch[166] Speed: 3207.33 samples/sec [1,0]:INFO:root:Epoch[167] Batch[100] Loss[3.686] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[100] rmse=0.020065 lr=0.131591 [1,0]:INFO:root:Epoch[167] Batch[200] Loss[2.567] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[200] rmse=0.019956 lr=0.131413 [1,0]:INFO:root:Epoch[167] Batch[300] Loss[2.833] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[300] rmse=0.019973 lr=0.131236 [1,0]:INFO:root:Epoch[167] Batch[400] Loss[2.809] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[400] rmse=0.020004 lr=0.131058 [1,0]:INFO:root:Epoch[167] Batch[500] Loss[5.042] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[500] rmse=0.019969 lr=0.130880 [1,0]:INFO:root:Epoch[167] Batch[600] Loss[3.454] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[600] rmse=0.019959 lr=0.130702 [1,0]:INFO:root:Epoch[167] Batch[700] Loss[2.825] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[700] rmse=0.019956 lr=0.130525 [1,0]:INFO:root:Epoch[167] Batch[800] Loss[2.681] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[800] rmse=0.019982 lr=0.130347 [1,0]:INFO:root:Epoch[167] Batch[900] Loss[2.298] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[900] rmse=0.020002 lr=0.130170 [1,0]:INFO:root:Epoch[167] Batch[1000] Loss[5.095] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[1000] rmse=0.019993 lr=0.129992 [1,0]:INFO:root:Epoch[167] Batch[1100] Loss[2.619] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[1100] rmse=0.020003 lr=0.129815 [1,0]:INFO:root:Epoch[167] Batch[1200] Loss[4.088] [1,0]:INFO:root:Epoch[167] Rank[0] Batch[1200] rmse=0.020004 lr=0.129638 [1,0]:INFO:root:Epoch[167] Rank[0] Batch[1251] Time cost=398.70 Train-metric=0.020007 [1,0]:INFO:root:Epoch[167] Speed: 3212.97 samples/sec [1,0]:INFO:root:Epoch[168] Batch[100] Loss[3.974] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[100] rmse=0.019924 lr=0.129370 [1,0]:INFO:root:Epoch[168] Batch[200] Loss[2.297] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[200] rmse=0.019870 lr=0.129193 [1,0]:INFO:root:Epoch[168] Batch[300] Loss[2.515] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[300] rmse=0.019905 lr=0.129016 [1,0]:INFO:root:Epoch[168] Batch[400] Loss[5.248] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[400] rmse=0.019974 lr=0.128839 [1,0]:INFO:root:Epoch[168] Batch[500] Loss[2.791] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[500] rmse=0.019977 lr=0.128662 [1,0]:INFO:root:Epoch[168] Batch[600] Loss[2.570] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[600] rmse=0.019959 lr=0.128485 [1,0]:INFO:root:Epoch[168] Batch[700] Loss[3.134] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[700] rmse=0.019954 lr=0.128308 [1,0]:INFO:root:Epoch[168] Batch[800] Loss[5.162] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[800] rmse=0.019960 lr=0.128131 [1,0]:INFO:root:Epoch[168] Batch[900] Loss[2.654] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[900] rmse=0.019965 lr=0.127955 [1,0]:INFO:root:Epoch[168] Batch[1000] Loss[4.985] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[1000] rmse=0.019972 lr=0.127778 [1,0]:INFO:root:Epoch[168] Batch[1100] Loss[2.563] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[1100] rmse=0.019972 lr=0.127601 [1,0]:INFO:root:Epoch[168] Batch[1200] Loss[4.843] [1,0]:INFO:root:Epoch[168] Rank[0] Batch[1200] rmse=0.019969 lr=0.127425 [1,0]:INFO:root:Epoch[168] Rank[0] Batch[1251] Time cost=399.63 Train-metric=0.019973 [1,0]:INFO:root:Epoch[168] Speed: 3205.56 samples/sec [1,0]:INFO:root:Epoch[169] Batch[100] Loss[2.376] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[100] rmse=0.020012 lr=0.127159 [1,0]:INFO:root:Epoch[169] Batch[200] Loss[2.583] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[200] rmse=0.019950 lr=0.126982 [1,0]:INFO:root:Epoch[169] Batch[300] Loss[2.825] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[300] rmse=0.019918 lr=0.126806 [1,0]:INFO:root:Epoch[169] Batch[400] Loss[3.515] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[400] rmse=0.019911 lr=0.126630 [1,0]:INFO:root:Epoch[169] Batch[500] Loss[2.689] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[500] rmse=0.019917 lr=0.126454 [1,0]:INFO:root:Epoch[169] Batch[600] Loss[2.712] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[600] rmse=0.019946 lr=0.126278 [1,0]:INFO:root:Epoch[169] Batch[700] Loss[3.310] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[700] rmse=0.019940 lr=0.126102 [1,0]:INFO:root:Epoch[169] Batch[800] Loss[2.787] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[800] rmse=0.019958 lr=0.125926 [1,0]:INFO:root:Epoch[169] Batch[900] Loss[2.677] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[900] rmse=0.019963 lr=0.125750 [1,0]:INFO:root:Epoch[169] Batch[1000] Loss[3.244] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[1000] rmse=0.019966 lr=0.125574 [1,0]:INFO:root:Epoch[169] Batch[1100] Loss[4.889] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[1100] rmse=0.019950 lr=0.125398 [1,0]:INFO:root:Epoch[169] Batch[1200] Loss[4.823] [1,0]:INFO:root:Epoch[169] Rank[0] Batch[1200] rmse=0.019947 lr=0.125222 [1,0]:INFO:root:Epoch[169] Rank[0] Batch[1251] Time cost=398.96 Train-metric=0.019950 [1,0]:INFO:root:Epoch[169] Speed: 3210.90 samples/sec [1,0]:INFO:root:Epoch[169] Rank[0] Validation-accuracy=0.663560 Validation-top_k_accuracy_5=0.874680 [1,0]:INFO:root:Epoch[170] Batch[100] Loss[2.016] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[100] rmse=0.019700 lr=0.124957 [1,0]:INFO:root:Epoch[170] Batch[200] Loss[4.366] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[200] rmse=0.019801 lr=0.124782 [1,0]:INFO:root:Epoch[170] Batch[300] Loss[2.712] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[300] rmse=0.019803 lr=0.124606 [1,0]:INFO:root:Epoch[170] Batch[400] Loss[4.411] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[400] rmse=0.019838 lr=0.124431 [1,0]:INFO:root:Epoch[170] Batch[500] Loss[2.532] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[500] rmse=0.019825 lr=0.124256 [1,0]:INFO:root:Epoch[170] Batch[600] Loss[2.801] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[600] rmse=0.019848 lr=0.124080 [1,0]:INFO:root:Epoch[170] Batch[700] Loss[2.522] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[700] rmse=0.019885 lr=0.123905 [1,0]:INFO:root:Epoch[170] Batch[800] Loss[4.866] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[800] rmse=0.019915 lr=0.123730 [1,0]:INFO:root:Epoch[170] Batch[900] Loss[3.748] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[900] rmse=0.019924 lr=0.123555 [1,0]:INFO:root:Epoch[170] Batch[1000] Loss[2.645] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[1000] rmse=0.019927 lr=0.123380 [1,0]:INFO:root:Epoch[170] Batch[1100] Loss[2.645] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[1100] rmse=0.019929 lr=0.123205 [1,0]:INFO:root:Epoch[170] Batch[1200] Loss[2.955] [1,0]:INFO:root:Epoch[170] Rank[0] Batch[1200] rmse=0.019932 lr=0.123030 [1,0]:INFO:root:Epoch[170] Rank[0] Batch[1251] Time cost=399.12 Train-metric=0.019928 [1,0]:INFO:root:Epoch[170] Speed: 3209.62 samples/sec [1,0]:INFO:root:Epoch[171] Batch[100] Loss[2.837] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[100] rmse=0.019879 lr=0.122767 [1,0]:INFO:root:Epoch[171] Batch[200] Loss[2.600] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[200] rmse=0.019838 lr=0.122592 [1,0]:INFO:root:Epoch[171] Batch[300] Loss[2.604] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[300] rmse=0.019843 lr=0.122417 [1,0]:INFO:root:Epoch[171] Batch[400] Loss[4.837] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[400] rmse=0.019857 lr=0.122243 [1,0]:INFO:root:Epoch[171] Batch[500] Loss[3.275] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[500] rmse=0.019883 lr=0.122068 [1,0]:INFO:root:Epoch[171] Batch[600] Loss[2.741] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[600] rmse=0.019894 lr=0.121894 [1,0]:INFO:root:Epoch[171] Batch[700] Loss[2.454] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[700] rmse=0.019922 lr=0.121720 [1,0]:INFO:root:Epoch[171] Batch[800] Loss[3.448] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[800] rmse=0.019933 lr=0.121546 [1,0]:INFO:root:Epoch[171] Batch[900] Loss[2.919] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[900] rmse=0.019914 lr=0.121371 [1,0]:INFO:root:Epoch[171] Batch[1000] Loss[3.825] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[1000] rmse=0.019929 lr=0.121197 [1,0]:INFO:root:Epoch[171] Batch[1100] Loss[4.823] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[1100] rmse=0.019941 lr=0.121023 [1,0]:INFO:root:Epoch[171] Batch[1200] Loss[3.387] [1,0]:INFO:root:Epoch[171] Rank[0] Batch[1200] rmse=0.019953 lr=0.120849 [1,0]:INFO:root:Epoch[171] Rank[0] Batch[1251] Time cost=398.91 Train-metric=0.019944 [1,0]:INFO:root:Epoch[171] Speed: 3211.32 samples/sec [1,0]:INFO:root:Epoch[172] Batch[100] Loss[2.630] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[100] rmse=0.019798 lr=0.120587 [1,0]:INFO:root:Epoch[172] Batch[200] Loss[2.516] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[200] rmse=0.019769 lr=0.120413 [1,0]:INFO:root:Epoch[172] Batch[300] Loss[5.311] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[300] rmse=0.019844 lr=0.120239 [1,0]:INFO:root:Epoch[172] Batch[400] Loss[3.260] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[400] rmse=0.019895 lr=0.120066 [1,0]:INFO:root:Epoch[172] Batch[500] Loss[2.578] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[500] rmse=0.019874 lr=0.119892 [1,0]:INFO:root:Epoch[172] Batch[600] Loss[4.999] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[600] rmse=0.019876 lr=0.119719 [1,0]:INFO:root:Epoch[172] Batch[700] Loss[2.613] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[700] rmse=0.019893 lr=0.119545 [1,0]:INFO:root:Epoch[172] Batch[800] Loss[2.819] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[800] rmse=0.019891 lr=0.119372 [1,0]:INFO:root:Epoch[172] Batch[900] Loss[2.603] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[900] rmse=0.019894 lr=0.119199 [1,0]:INFO:root:Epoch[172] Batch[1000] Loss[3.184] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[1000] rmse=0.019885 lr=0.119025 [1,0]:INFO:root:Epoch[172] Batch[1100] Loss[2.692] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[1100] rmse=0.019878 lr=0.118852 [1,0]:INFO:root:Epoch[172] Batch[1200] Loss[2.648] [1,0]:INFO:root:Epoch[172] Rank[0] Batch[1200] rmse=0.019872 lr=0.118679 [1,0]:INFO:root:Epoch[172] Rank[0] Batch[1251] Time cost=401.36 Train-metric=0.019881 [1,0]:INFO:root:Epoch[172] Speed: 3191.72 samples/sec [1,0]:INFO:root:Epoch[173] Batch[100] Loss[2.531] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[100] rmse=0.019803 lr=0.118418 [1,0]:INFO:root:Epoch[173] Batch[200] Loss[2.874] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[200] rmse=0.019904 lr=0.118245 [1,0]:INFO:root:Epoch[173] Batch[300] Loss[3.131] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[300] rmse=0.019854 lr=0.118072 [1,0]:INFO:root:Epoch[173] Batch[400] Loss[3.782] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[400] rmse=0.019914 lr=0.117900 [1,0]:INFO:root:Epoch[173] Batch[500] Loss[2.816] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[500] rmse=0.019910 lr=0.117727 [1,0]:INFO:root:Epoch[173] Batch[600] Loss[4.630] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[600] rmse=0.019907 lr=0.117554 [1,0]:INFO:root:Epoch[173] Batch[700] Loss[5.250] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[700] rmse=0.019884 lr=0.117382 [1,0]:INFO:root:Epoch[173] Batch[800] Loss[2.743] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[800] rmse=0.019883 lr=0.117209 [1,0]:INFO:root:Epoch[173] Batch[900] Loss[2.736] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[900] rmse=0.019903 lr=0.117037 [1,0]:INFO:root:Epoch[173] Batch[1000] Loss[2.899] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[1000] rmse=0.019908 lr=0.116865 [1,0]:INFO:root:Epoch[173] Batch[1100] Loss[3.230] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[1100] rmse=0.019918 lr=0.116693 [1,0]:INFO:root:Epoch[173] Batch[1200] Loss[4.559] [1,0]:INFO:root:Epoch[173] Rank[0] Batch[1200] rmse=0.019933 lr=0.116521 [1,0]:INFO:root:Epoch[173] Rank[0] Batch[1251] Time cost=402.18 Train-metric=0.019924 [1,0]:INFO:root:Epoch[173] Speed: 3185.22 samples/sec [1,0]:INFO:root:Epoch[174] Batch[100] Loss[2.680] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[100] rmse=0.019791 lr=0.116261 [1,0]:INFO:root:Epoch[174] Batch[200] Loss[2.783] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[200] rmse=0.019816 lr=0.116089 [1,0]:INFO:root:Epoch[174] Batch[300] Loss[2.376] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[300] rmse=0.019760 lr=0.115917 [1,0]:INFO:root:Epoch[174] Batch[400] Loss[2.681] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[400] rmse=0.019744 lr=0.115745 [1,0]:INFO:root:Epoch[174] Batch[500] Loss[2.570] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[500] rmse=0.019780 lr=0.115573 [1,0]:INFO:root:Epoch[174] Batch[600] Loss[4.721] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[600] rmse=0.019805 lr=0.115402 [1,0]:INFO:root:Epoch[174] Batch[700] Loss[2.842] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[700] rmse=0.019825 lr=0.115230 [1,0]:INFO:root:Epoch[174] Batch[800] Loss[4.467] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[800] rmse=0.019842 lr=0.115059 [1,0]:INFO:root:Epoch[174] Batch[900] Loss[3.430] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[900] rmse=0.019833 lr=0.114887 [1,0]:INFO:root:Epoch[174] Batch[1000] Loss[5.032] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[1000] rmse=0.019854 lr=0.114716 [1,0]:INFO:root:Epoch[174] Batch[1100] Loss[2.660] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[1100] rmse=0.019874 lr=0.114545 [1,0]:INFO:root:Epoch[174] Batch[1200] Loss[2.790] [1,0]:INFO:root:Epoch[174] Rank[0] Batch[1200] rmse=0.019891 lr=0.114374 [1,0]:INFO:root:Epoch[174] Rank[0] Batch[1251] Time cost=402.32 Train-metric=0.019886 [1,0]:INFO:root:Epoch[174] Speed: 3184.07 samples/sec [1,0]:INFO:root:Epoch[174] Rank[0] Validation-accuracy=0.669840 Validation-top_k_accuracy_5=0.880340 [1,0]:INFO:root:Epoch[175] Batch[100] Loss[2.652] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[100] rmse=0.019703 lr=0.114115 [1,0]:INFO:root:Epoch[175] Batch[200] Loss[2.311] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[200] rmse=0.019643 lr=0.113944 [1,0]:INFO:root:Epoch[175] Batch[300] Loss[2.852] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[300] rmse=0.019689 lr=0.113773 [1,0]:INFO:root:Epoch[175] Batch[400] Loss[2.993] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[400] rmse=0.019736 lr=0.113602 [1,0]:INFO:root:Epoch[175] Batch[500] Loss[4.791] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[500] rmse=0.019737 lr=0.113432 [1,0]:INFO:root:Epoch[175] Batch[600] Loss[2.588] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[600] rmse=0.019730 lr=0.113261 [1,0]:INFO:root:Epoch[175] Batch[700] Loss[2.692] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[700] rmse=0.019738 lr=0.113090 [1,0]:INFO:root:Epoch[175] Batch[800] Loss[3.464] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[800] rmse=0.019773 lr=0.112920 [1,0]:INFO:root:Epoch[175] Batch[900] Loss[2.773] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[900] rmse=0.019789 lr=0.112749 [1,0]:INFO:root:Epoch[175] Batch[1000] Loss[2.971] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[1000] rmse=0.019792 lr=0.112579 [1,0]:INFO:root:Epoch[175] Batch[1100] Loss[2.588] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[1100] rmse=0.019795 lr=0.112409 [1,0]:INFO:root:Epoch[175] Batch[1200] Loss[2.612] [1,0]:INFO:root:Epoch[175] Rank[0] Batch[1200] rmse=0.019810 lr=0.112239 [1,0]:INFO:root:Epoch[175] Rank[0] Batch[1251] Time cost=402.05 Train-metric=0.019806 [1,0]:INFO:root:Epoch[175] Speed: 3186.22 samples/sec [1,0]:INFO:root:Epoch[176] Batch[100] Loss[2.435] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[100] rmse=0.019924 lr=0.111982 [1,0]:INFO:root:Epoch[176] Batch[200] Loss[2.526] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[200] rmse=0.019841 lr=0.111812 [1,0]:INFO:root:Epoch[176] Batch[300] Loss[4.743] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[300] rmse=0.019850 lr=0.111642 [1,0]:INFO:root:Epoch[176] Batch[400] Loss[3.237] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[400] rmse=0.019828 lr=0.111472 [1,0]:INFO:root:Epoch[176] Batch[500] Loss[5.188] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[500] rmse=0.019807 lr=0.111302 [1,0]:INFO:root:Epoch[176] Batch[600] Loss[3.429] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[600] rmse=0.019846 lr=0.111132 [1,0]:INFO:root:Epoch[176] Batch[700] Loss[3.748] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[700] rmse=0.019846 lr=0.110963 [1,0]:INFO:root:Epoch[176] Batch[800] Loss[5.019] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[800] rmse=0.019863 lr=0.110793 [1,0]:INFO:root:Epoch[176] Batch[900] Loss[5.048] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[900] rmse=0.019859 lr=0.110624 [1,0]:INFO:root:Epoch[176] Batch[1000] Loss[3.069] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[1000] rmse=0.019852 lr=0.110454 [1,0]:INFO:root:Epoch[176] Batch[1100] Loss[3.071] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[1100] rmse=0.019844 lr=0.110285 [1,0]:INFO:root:Epoch[176] Batch[1200] Loss[3.147] [1,0]:INFO:root:Epoch[176] Rank[0] Batch[1200] rmse=0.019862 lr=0.110116 [1,0]:INFO:root:Epoch[176] Rank[0] Batch[1251] Time cost=400.02 Train-metric=0.019871 [1,0]:INFO:root:Epoch[176] Speed: 3202.36 samples/sec [1,0]:INFO:root:Epoch[177] Batch[100] Loss[2.764] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[100] rmse=0.019630 lr=0.109861 [1,0]:INFO:root:Epoch[177] Batch[200] Loss[2.612] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[200] rmse=0.019770 lr=0.109692 [1,0]:INFO:root:Epoch[177] Batch[300] Loss[4.360] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[300] rmse=0.019820 lr=0.109523 [1,0]:INFO:root:Epoch[177] Batch[400] Loss[2.681] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[400] rmse=0.019786 lr=0.109354 [1,0]:INFO:root:Epoch[177] Batch[500] Loss[2.591] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[500] rmse=0.019813 lr=0.109185 [1,0]:INFO:root:Epoch[177] Batch[600] Loss[5.038] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[600] rmse=0.019808 lr=0.109016 [1,0]:INFO:root:Epoch[177] Batch[700] Loss[2.389] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[700] rmse=0.019802 lr=0.108848 [1,0]:INFO:root:Epoch[177] Batch[800] Loss[2.522] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[800] rmse=0.019802 lr=0.108679 [1,0]:INFO:root:Epoch[177] Batch[900] Loss[2.968] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[900] rmse=0.019826 lr=0.108511 [1,0]:INFO:root:Epoch[177] Batch[1000] Loss[2.609] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[1000] rmse=0.019825 lr=0.108342 [1,0]:INFO:root:Epoch[177] Batch[1100] Loss[3.836] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[1100] rmse=0.019842 lr=0.108174 [1,0]:INFO:root:Epoch[177] Batch[1200] Loss[4.361] [1,0]:INFO:root:Epoch[177] Rank[0] Batch[1200] rmse=0.019838 lr=0.108006 [1,0]:INFO:root:Epoch[177] Rank[0] Batch[1251] Time cost=400.91 Train-metric=0.019835 [1,0]:INFO:root:Epoch[177] Speed: 3195.25 samples/sec [1,0]:INFO:root:Epoch[178] Batch[100] Loss[4.785] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[100] rmse=0.019715 lr=0.107752 [1,0]:INFO:root:Epoch[178] Batch[200] Loss[2.626] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[200] rmse=0.019699 lr=0.107584 [1,0]:INFO:root:Epoch[178] Batch[300] Loss[4.103] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[300] rmse=0.019670 lr=0.107416 [1,0]:INFO:root:Epoch[178] Batch[400] Loss[4.938] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[400] rmse=0.019651 lr=0.107248 [1,0]:INFO:root:Epoch[178] Batch[500] Loss[2.782] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[500] rmse=0.019696 lr=0.107081 [1,0]:INFO:root:Epoch[178] Batch[600] Loss[2.632] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[600] rmse=0.019710 lr=0.106913 [1,0]:INFO:root:Epoch[178] Batch[700] Loss[2.536] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[700] rmse=0.019729 lr=0.106745 [1,0]:INFO:root:Epoch[178] Batch[800] Loss[4.524] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[800] rmse=0.019753 lr=0.106578 [1,0]:INFO:root:Epoch[178] Batch[900] Loss[3.815] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[900] rmse=0.019768 lr=0.106411 [1,0]:INFO:root:Epoch[178] Batch[1000] Loss[2.473] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[1000] rmse=0.019769 lr=0.106243 [1,0]:INFO:root:Epoch[178] Batch[1100] Loss[2.571] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[1100] rmse=0.019790 lr=0.106076 [1,0]:INFO:root:Epoch[178] Batch[1200] Loss[4.631] [1,0]:INFO:root:Epoch[178] Rank[0] Batch[1200] rmse=0.019789 lr=0.105909 [1,0]:INFO:root:Epoch[178] Rank[0] Batch[1251] Time cost=399.04 Train-metric=0.019790 [1,0]:INFO:root:Epoch[178] Speed: 3210.25 samples/sec [1,0]:INFO:root:Epoch[179] Batch[100] Loss[2.453] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[100] rmse=0.019583 lr=0.105657 [1,0]:INFO:root:Epoch[179] Batch[200] Loss[2.813] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[200] rmse=0.019690 lr=0.105490 [1,0]:INFO:root:Epoch[179] Batch[300] Loss[2.675] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[300] rmse=0.019743 lr=0.105323 [1,0]:INFO:root:Epoch[179] Batch[400] Loss[2.925] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[400] rmse=0.019725 lr=0.105156 [1,0]:INFO:root:Epoch[179] Batch[500] Loss[2.477] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[500] rmse=0.019741 lr=0.104989 [1,0]:INFO:root:Epoch[179] Batch[600] Loss[2.522] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[600] rmse=0.019746 lr=0.104823 [1,0]:INFO:root:Epoch[179] Batch[700] Loss[3.501] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[700] rmse=0.019753 lr=0.104656 [1,0]:INFO:root:Epoch[179] Batch[800] Loss[2.500] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[800] rmse=0.019759 lr=0.104490 [1,0]:INFO:root:Epoch[179] Batch[900] Loss[4.839] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[900] rmse=0.019762 lr=0.104323 [1,0]:INFO:root:Epoch[179] Batch[1000] Loss[2.583] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[1000] rmse=0.019764 lr=0.104157 [1,0]:INFO:root:Epoch[179] Batch[1100] Loss[3.936] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[1100] rmse=0.019775 lr=0.103991 [1,0]:INFO:root:Epoch[179] Batch[1200] Loss[3.072] [1,0]:INFO:root:Epoch[179] Rank[0] Batch[1200] rmse=0.019783 lr=0.103825 [1,0]:INFO:root:Epoch[179] Rank[0] Batch[1251] Time cost=400.20 Train-metric=0.019793 [1,0]:INFO:root:Epoch[179] Speed: 3200.96 samples/sec [1,0]:INFO:root:Epoch[179] Rank[0] Validation-accuracy=0.677220 Validation-top_k_accuracy_5=0.880600 [1,0]:INFO:root:Epoch[180] Batch[100] Loss[2.526] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[100] rmse=0.019732 lr=0.103574 [1,0]:INFO:root:Epoch[180] Batch[200] Loss[2.628] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[200] rmse=0.019670 lr=0.103408 [1,0]:INFO:root:Epoch[180] Batch[300] Loss[2.595] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[300] rmse=0.019704 lr=0.103243 [1,0]:INFO:root:Epoch[180] Batch[400] Loss[3.280] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[400] rmse=0.019724 lr=0.103077 [1,0]:INFO:root:Epoch[180] Batch[500] Loss[4.102] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[500] rmse=0.019754 lr=0.102911 [1,0]:INFO:root:Epoch[180] Batch[600] Loss[3.379] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[600] rmse=0.019755 lr=0.102746 [1,0]:INFO:root:Epoch[180] Batch[700] Loss[2.551] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[700] rmse=0.019743 lr=0.102580 [1,0]:INFO:root:Epoch[180] Batch[800] Loss[4.468] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[800] rmse=0.019737 lr=0.102415 [1,0]:INFO:root:Epoch[180] Batch[900] Loss[4.049] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[900] rmse=0.019744 lr=0.102250 [1,0]:INFO:root:Epoch[180] Batch[1000] Loss[4.153] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[1000] rmse=0.019727 lr=0.102085 [1,0]:INFO:root:Epoch[180] Batch[1100] Loss[2.542] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[1100] rmse=0.019751 lr=0.101919 [1,0]:INFO:root:Epoch[180] Batch[1200] Loss[4.052] [1,0]:INFO:root:Epoch[180] Rank[0] Batch[1200] rmse=0.019747 lr=0.101754 [1,0]:INFO:root:Epoch[180] Rank[0] Batch[1251] Time cost=398.25 Train-metric=0.019749 [1,0]:INFO:root:Epoch[180] Speed: 3216.60 samples/sec [1,0]:INFO:root:Epoch[181] Batch[100] Loss[2.622] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[100] rmse=0.019692 lr=0.101505 [1,0]:INFO:root:Epoch[181] Batch[200] Loss[3.363] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[200] rmse=0.019692 lr=0.101341 [1,0]:INFO:root:Epoch[181] Batch[300] Loss[2.699] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[300] rmse=0.019670 lr=0.101176 [1,0]:INFO:root:Epoch[181] Batch[400] Loss[2.645] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[400] rmse=0.019649 lr=0.101011 [1,0]:INFO:root:Epoch[181] Batch[500] Loss[2.634] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[500] rmse=0.019670 lr=0.100847 [1,0]:INFO:root:Epoch[181] Batch[600] Loss[5.049] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[600] rmse=0.019676 lr=0.100682 [1,0]:INFO:root:Epoch[181] Batch[700] Loss[2.350] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[700] rmse=0.019671 lr=0.100518 [1,0]:INFO:root:Epoch[181] Batch[800] Loss[2.727] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[800] rmse=0.019668 lr=0.100354 [1,0]:INFO:root:Epoch[181] Batch[900] Loss[2.657] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[900] rmse=0.019692 lr=0.100190 [1,0]:INFO:root:Epoch[181] Batch[1000] Loss[4.876] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[1000] rmse=0.019715 lr=0.100026 [1,0]:INFO:root:Epoch[181] Batch[1100] Loss[4.580] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[1100] rmse=0.019724 lr=0.099862 [1,0]:INFO:root:Epoch[181] Batch[1200] Loss[4.721] [1,0]:INFO:root:Epoch[181] Rank[0] Batch[1200] rmse=0.019725 lr=0.099698 [1,0]:INFO:root:Epoch[181] Rank[0] Batch[1251] Time cost=399.16 Train-metric=0.019716 [1,0]:INFO:root:Epoch[181] Speed: 3209.27 samples/sec [1,0]:INFO:root:Epoch[182] Batch[100] Loss[2.742] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[100] rmse=0.019599 lr=0.099451 [1,0]:INFO:root:Epoch[182] Batch[200] Loss[5.080] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[200] rmse=0.019565 lr=0.099287 [1,0]:INFO:root:Epoch[182] Batch[300] Loss[2.847] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[300] rmse=0.019610 lr=0.099123 [1,0]:INFO:root:Epoch[182] Batch[400] Loss[4.859] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[400] rmse=0.019611 lr=0.098960 [1,0]:INFO:root:Epoch[182] Batch[500] Loss[2.471] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[500] rmse=0.019635 lr=0.098796 [1,0]:INFO:root:Epoch[182] Batch[600] Loss[2.528] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[600] rmse=0.019680 lr=0.098633 [1,0]:INFO:root:Epoch[182] Batch[700] Loss[3.794] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[700] rmse=0.019710 lr=0.098470 [1,0]:INFO:root:Epoch[182] Batch[800] Loss[2.696] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[800] rmse=0.019715 lr=0.098307 [1,0]:INFO:root:Epoch[182] Batch[900] Loss[2.828] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[900] rmse=0.019696 lr=0.098144 [1,0]:INFO:root:Epoch[182] Batch[1000] Loss[3.016] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[1000] rmse=0.019691 lr=0.097981 [1,0]:INFO:root:Epoch[182] Batch[1100] Loss[2.320] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[1100] rmse=0.019705 lr=0.097818 [1,0]:INFO:root:Epoch[182] Batch[1200] Loss[2.812] [1,0]:INFO:root:Epoch[182] Rank[0] Batch[1200] rmse=0.019700 lr=0.097655 [1,0]:INFO:root:Epoch[182] Rank[0] Batch[1251] Time cost=398.94 Train-metric=0.019697 [1,0]:INFO:root:Epoch[182] Speed: 3211.08 samples/sec [1,0]:INFO:root:Epoch[183] Batch[100] Loss[5.002] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[100] rmse=0.019642 lr=0.097410 [1,0]:INFO:root:Epoch[183] Batch[200] Loss[2.830] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[200] rmse=0.019562 lr=0.097247 [1,0]:INFO:root:Epoch[183] Batch[300] Loss[2.405] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[300] rmse=0.019580 lr=0.097085 [1,0]:INFO:root:Epoch[183] Batch[400] Loss[2.928] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[400] rmse=0.019585 lr=0.096922 [1,0]:INFO:root:Epoch[183] Batch[500] Loss[2.476] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[500] rmse=0.019614 lr=0.096760 [1,0]:INFO:root:Epoch[183] Batch[600] Loss[2.470] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[600] rmse=0.019612 lr=0.096598 [1,0]:INFO:root:Epoch[183] Batch[700] Loss[4.808] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[700] rmse=0.019626 lr=0.096436 [1,0]:INFO:root:Epoch[183] Batch[800] Loss[2.576] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[800] rmse=0.019616 lr=0.096274 [1,0]:INFO:root:Epoch[183] Batch[900] Loss[2.666] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[900] rmse=0.019637 lr=0.096112 [1,0]:INFO:root:Epoch[183] Batch[1000] Loss[4.919] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[1000] rmse=0.019655 lr=0.095950 [1,0]:INFO:root:Epoch[183] Batch[1100] Loss[3.399] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[1100] rmse=0.019671 lr=0.095789 [1,0]:INFO:root:Epoch[183] Batch[1200] Loss[3.953] [1,0]:INFO:root:Epoch[183] Rank[0] Batch[1200] rmse=0.019673 lr=0.095627 [1,0]:INFO:root:Epoch[183] Rank[0] Batch[1251] Time cost=399.27 Train-metric=0.019674 [1,0]:INFO:root:Epoch[183] Speed: 3208.44 samples/sec [1,0]:INFO:root:Epoch[184] Batch[100] Loss[2.384] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[100] rmse=0.019477 lr=0.095383 [1,0]:INFO:root:Epoch[184] Batch[200] Loss[2.314] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[200] rmse=0.019439 lr=0.095222 [1,0]:INFO:root:Epoch[184] Batch[300] Loss[2.611] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[300] rmse=0.019485 lr=0.095061 [1,0]:INFO:root:Epoch[184] Batch[400] Loss[4.652] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[400] rmse=0.019535 lr=0.094899 [1,0]:INFO:root:Epoch[184] Batch[500] Loss[2.979] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[500] rmse=0.019579 lr=0.094738 [1,0]:INFO:root:Epoch[184] Batch[600] Loss[3.448] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[600] rmse=0.019571 lr=0.094577 [1,0]:INFO:root:Epoch[184] Batch[700] Loss[2.401] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[700] rmse=0.019582 lr=0.094417 [1,0]:INFO:root:Epoch[184] Batch[800] Loss[3.187] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[800] rmse=0.019601 lr=0.094256 [1,0]:INFO:root:Epoch[184] Batch[900] Loss[2.420] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[900] rmse=0.019611 lr=0.094095 [1,0]:INFO:root:Epoch[184] Batch[1000] Loss[3.447] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[1000] rmse=0.019629 lr=0.093934 [1,0]:INFO:root:Epoch[184] Batch[1100] Loss[3.678] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[1100] rmse=0.019613 lr=0.093774 [1,0]:INFO:root:Epoch[184] Batch[1200] Loss[2.431] [1,0]:INFO:root:Epoch[184] Rank[0] Batch[1200] rmse=0.019614 lr=0.093614 [1,0]:INFO:root:Epoch[184] Rank[0] Batch[1251] Time cost=399.48 Train-metric=0.019616 [1,0]:INFO:root:Epoch[184] Speed: 3206.71 samples/sec [1,0]:INFO:root:Epoch[184] Rank[0] Validation-accuracy=0.683280 Validation-top_k_accuracy_5=0.886280 [1,0]:INFO:root:Epoch[185] Batch[100] Loss[2.622] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[100] rmse=0.019698 lr=0.093371 [1,0]:INFO:root:Epoch[185] Batch[200] Loss[4.981] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[200] rmse=0.019691 lr=0.093211 [1,0]:INFO:root:Epoch[185] Batch[300] Loss[2.476] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[300] rmse=0.019690 lr=0.093051 [1,0]:INFO:root:Epoch[185] Batch[400] Loss[2.759] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[400] rmse=0.019676 lr=0.092891 [1,0]:INFO:root:Epoch[185] Batch[500] Loss[2.411] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[500] rmse=0.019653 lr=0.092731 [1,0]:INFO:root:Epoch[185] Batch[600] Loss[3.478] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[600] rmse=0.019675 lr=0.092572 [1,0]:INFO:root:Epoch[185] Batch[700] Loss[2.735] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[700] rmse=0.019680 lr=0.092412 [1,0]:INFO:root:Epoch[185] Batch[800] Loss[3.261] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[800] rmse=0.019679 lr=0.092252 [1,0]:INFO:root:Epoch[185] Batch[900] Loss[2.575] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[900] rmse=0.019680 lr=0.092093 [1,0]:INFO:root:Epoch[185] Batch[1000] Loss[3.667] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[1000] rmse=0.019680 lr=0.091933 [1,0]:INFO:root:Epoch[185] Batch[1100] Loss[4.797] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[1100] rmse=0.019677 lr=0.091774 [1,0]:INFO:root:Epoch[185] Batch[1200] Loss[2.723] [1,0]:INFO:root:Epoch[185] Rank[0] Batch[1200] rmse=0.019672 lr=0.091615 [1,0]:INFO:root:Epoch[185] Rank[0] Batch[1251] Time cost=398.62 Train-metric=0.019669 [1,0]:INFO:root:Epoch[185] Speed: 3213.67 samples/sec [1,0]:INFO:root:Epoch[186] Batch[100] Loss[2.652] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[100] rmse=0.019692 lr=0.091375 [1,0]:INFO:root:Epoch[186] Batch[200] Loss[2.856] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[200] rmse=0.019569 lr=0.091216 [1,0]:INFO:root:Epoch[186] Batch[300] Loss[4.685] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[300] rmse=0.019559 lr=0.091057 [1,0]:INFO:root:Epoch[186] Batch[400] Loss[2.675] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[400] rmse=0.019567 lr=0.090898 [1,0]:INFO:root:Epoch[186] Batch[500] Loss[4.075] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[500] rmse=0.019529 lr=0.090739 [1,0]:INFO:root:Epoch[186] Batch[600] Loss[5.139] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[600] rmse=0.019528 lr=0.090581 [1,0]:INFO:root:Epoch[186] Batch[700] Loss[2.626] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[700] rmse=0.019548 lr=0.090422 [1,0]:INFO:root:Epoch[186] Batch[800] Loss[3.088] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[800] rmse=0.019525 lr=0.090264 [1,0]:INFO:root:Epoch[186] Batch[900] Loss[2.967] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[900] rmse=0.019537 lr=0.090106 [1,0]:INFO:root:Epoch[186] Batch[1000] Loss[2.564] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[1000] rmse=0.019554 lr=0.089948 [1,0]:INFO:root:Epoch[186] Batch[1100] Loss[4.287] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[1100] rmse=0.019560 lr=0.089789 [1,0]:INFO:root:Epoch[186] Batch[1200] Loss[2.711] [1,0]:INFO:root:Epoch[186] Rank[0] Batch[1200] rmse=0.019574 lr=0.089631 [1,0]:INFO:root:Epoch[186] Rank[0] Batch[1251] Time cost=399.26 Train-metric=0.019583 [1,0]:INFO:root:Epoch[186] Speed: 3208.50 samples/sec [1,0]:INFO:root:Epoch[187] Batch[100] Loss[4.557] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[100] rmse=0.019580 lr=0.089393 [1,0]:INFO:root:Epoch[187] Batch[200] Loss[3.309] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[200] rmse=0.019593 lr=0.089235 [1,0]:INFO:root:Epoch[187] Batch[300] Loss[2.629] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[300] rmse=0.019564 lr=0.089078 [1,0]:INFO:root:Epoch[187] Batch[400] Loss[2.401] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[400] rmse=0.019562 lr=0.088920 [1,0]:INFO:root:Epoch[187] Batch[500] Loss[3.028] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[500] rmse=0.019581 lr=0.088763 [1,0]:INFO:root:Epoch[187] Batch[600] Loss[4.766] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[600] rmse=0.019592 lr=0.088605 [1,0]:INFO:root:Epoch[187] Batch[700] Loss[2.442] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[700] rmse=0.019600 lr=0.088448 [1,0]:INFO:root:Epoch[187] Batch[800] Loss[2.544] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[800] rmse=0.019613 lr=0.088291 [1,0]:INFO:root:Epoch[187] Batch[900] Loss[4.682] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[900] rmse=0.019601 lr=0.088134 [1,0]:INFO:root:Epoch[187] Batch[1000] Loss[3.086] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[1000] rmse=0.019611 lr=0.087977 [1,0]:INFO:root:Epoch[187] Batch[1100] Loss[2.760] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[1100] rmse=0.019616 lr=0.087820 [1,0]:INFO:root:Epoch[187] Batch[1200] Loss[4.116] [1,0]:INFO:root:Epoch[187] Rank[0] Batch[1200] rmse=0.019605 lr=0.087664 [1,0]:INFO:root:Epoch[187] Rank[0] Batch[1251] Time cost=399.42 Train-metric=0.019603 [1,0]:INFO:root:Epoch[187] Speed: 3207.18 samples/sec [1,0]:INFO:root:Epoch[188] Batch[100] Loss[4.732] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[100] rmse=0.019445 lr=0.087427 [1,0]:INFO:root:Epoch[188] Batch[200] Loss[2.767] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[200] rmse=0.019503 lr=0.087271 [1,0]:INFO:root:Epoch[188] Batch[300] Loss[2.952] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[300] rmse=0.019482 lr=0.087114 [1,0]:INFO:root:Epoch[188] Batch[400] Loss[2.681] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[400] rmse=0.019486 lr=0.086958 [1,0]:INFO:root:Epoch[188] Batch[500] Loss[3.114] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[500] rmse=0.019514 lr=0.086802 [1,0]:INFO:root:Epoch[188] Batch[600] Loss[4.881] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[600] rmse=0.019523 lr=0.086646 [1,0]:INFO:root:Epoch[188] Batch[700] Loss[2.651] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[700] rmse=0.019533 lr=0.086490 [1,0]:INFO:root:Epoch[188] Batch[800] Loss[2.737] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[800] rmse=0.019551 lr=0.086334 [1,0]:INFO:root:Epoch[188] Batch[900] Loss[3.804] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[900] rmse=0.019573 lr=0.086178 [1,0]:INFO:root:Epoch[188] Batch[1000] Loss[2.885] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[1000] rmse=0.019559 lr=0.086022 [1,0]:INFO:root:Epoch[188] Batch[1100] Loss[3.014] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[1100] rmse=0.019561 lr=0.085867 [1,0]:INFO:root:Epoch[188] Batch[1200] Loss[2.832] [1,0]:INFO:root:Epoch[188] Rank[0] Batch[1200] rmse=0.019563 lr=0.085711 [1,0]:INFO:root:Epoch[188] Rank[0] Batch[1251] Time cost=400.32 Train-metric=0.019572 [1,0]:INFO:root:Epoch[188] Speed: 3200.02 samples/sec [1,0]:INFO:root:Epoch[189] Batch[100] Loss[2.659] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[100] rmse=0.019396 lr=0.085477 [1,0]:INFO:root:Epoch[189] Batch[200] Loss[4.683] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[200] rmse=0.019354 lr=0.085322 [1,0]:INFO:root:Epoch[189] Batch[300] Loss[2.444] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[300] rmse=0.019408 lr=0.085166 [1,0]:INFO:root:Epoch[189] Batch[400] Loss[4.681] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[400] rmse=0.019425 lr=0.085012 [1,0]:INFO:root:Epoch[189] Batch[500] Loss[4.784] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[500] rmse=0.019447 lr=0.084857 [1,0]:INFO:root:Epoch[189] Batch[600] Loss[4.084] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[600] rmse=0.019448 lr=0.084702 [1,0]:INFO:root:Epoch[189] Batch[700] Loss[3.829] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[700] rmse=0.019461 lr=0.084547 [1,0]:INFO:root:Epoch[189] Batch[800] Loss[2.469] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[800] rmse=0.019475 lr=0.084393 [1,0]:INFO:root:Epoch[189] Batch[900] Loss[2.441] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[900] rmse=0.019488 lr=0.084238 [1,0]:INFO:root:Epoch[189] Batch[1000] Loss[2.474] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[1000] rmse=0.019472 lr=0.084084 [1,0]:INFO:root:Epoch[189] Batch[1100] Loss[5.087] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[1100] rmse=0.019493 lr=0.083929 [1,0]:INFO:root:Epoch[189] Batch[1200] Loss[2.360] [1,0]:INFO:root:Epoch[189] Rank[0] Batch[1200] rmse=0.019498 lr=0.083775 [1,0]:INFO:root:Epoch[189] Rank[0] Batch[1251] Time cost=399.63 Train-metric=0.019501 [1,0]:INFO:root:Epoch[189] Speed: 3205.50 samples/sec [1,0]:INFO:root:Epoch[189] Rank[0] Validation-accuracy=0.685360 Validation-top_k_accuracy_5=0.887720 [1,0]:INFO:root:Epoch[190] Batch[100] Loss[2.561] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[100] rmse=0.019361 lr=0.083543 [1,0]:INFO:root:Epoch[190] Batch[200] Loss[2.671] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[200] rmse=0.019344 lr=0.083389 [1,0]:INFO:root:Epoch[190] Batch[300] Loss[3.975] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[300] rmse=0.019376 lr=0.083235 [1,0]:INFO:root:Epoch[190] Batch[400] Loss[4.282] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[400] rmse=0.019408 lr=0.083081 [1,0]:INFO:root:Epoch[190] Batch[500] Loss[3.060] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[500] rmse=0.019403 lr=0.082928 [1,0]:INFO:root:Epoch[190] Batch[600] Loss[3.314] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[600] rmse=0.019463 lr=0.082774 [1,0]:INFO:root:Epoch[190] Batch[700] Loss[2.451] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[700] rmse=0.019469 lr=0.082621 [1,0]:INFO:root:Epoch[190] Batch[800] Loss[4.566] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[800] rmse=0.019472 lr=0.082467 [1,0]:INFO:root:Epoch[190] Batch[900] Loss[4.436] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[900] rmse=0.019481 lr=0.082314 [1,0]:INFO:root:Epoch[190] Batch[1000] Loss[2.775] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[1000] rmse=0.019486 lr=0.082161 [1,0]:INFO:root:Epoch[190] Batch[1100] Loss[2.787] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[1100] rmse=0.019500 lr=0.082008 [1,0]:INFO:root:Epoch[190] Batch[1200] Loss[4.274] [1,0]:INFO:root:Epoch[190] Rank[0] Batch[1200] rmse=0.019507 lr=0.081855 [1,0]:INFO:root:Epoch[190] Rank[0] Batch[1251] Time cost=398.79 Train-metric=0.019514 [1,0]:INFO:root:Epoch[190] Speed: 3212.24 samples/sec [1,0]:INFO:root:Epoch[191] Batch[100] Loss[2.539] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[100] rmse=0.019310 lr=0.081625 [1,0]:INFO:root:Epoch[191] Batch[200] Loss[2.564] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[200] rmse=0.019424 lr=0.081472 [1,0]:INFO:root:Epoch[191] Batch[300] Loss[2.723] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[300] rmse=0.019444 lr=0.081320 [1,0]:INFO:root:Epoch[191] Batch[400] Loss[2.307] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[400] rmse=0.019432 lr=0.081167 [1,0]:INFO:root:Epoch[191] Batch[500] Loss[2.934] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[500] rmse=0.019417 lr=0.081015 [1,0]:INFO:root:Epoch[191] Batch[600] Loss[2.872] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[600] rmse=0.019423 lr=0.080863 [1,0]:INFO:root:Epoch[191] Batch[700] Loss[2.525] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[700] rmse=0.019450 lr=0.080711 [1,0]:INFO:root:Epoch[191] Batch[800] Loss[2.609] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[800] rmse=0.019453 lr=0.080559 [1,0]:INFO:root:Epoch[191] Batch[900] Loss[2.304] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[900] rmse=0.019468 lr=0.080407 [1,0]:INFO:root:Epoch[191] Batch[1000] Loss[4.266] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[1000] rmse=0.019472 lr=0.080255 [1,0]:INFO:root:Epoch[191] Batch[1100] Loss[2.684] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[1100] rmse=0.019465 lr=0.080104 [1,0]:INFO:root:Epoch[191] Batch[1200] Loss[2.541] [1,0]:INFO:root:Epoch[191] Rank[0] Batch[1200] rmse=0.019479 lr=0.079952 [1,0]:INFO:root:Epoch[191] Rank[0] Batch[1251] Time cost=399.33 Train-metric=0.019487 [1,0]:INFO:root:Epoch[191] Speed: 3207.92 samples/sec [1,0]:INFO:root:Epoch[192] Batch[100] Loss[2.465] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[100] rmse=0.019266 lr=0.079724 [1,0]:INFO:root:Epoch[192] Batch[200] Loss[2.682] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[200] rmse=0.019386 lr=0.079572 [1,0]:INFO:root:Epoch[192] Batch[300] Loss[2.647] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[300] rmse=0.019394 lr=0.079421 [1,0]:INFO:root:Epoch[192] Batch[400] Loss[2.368] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[400] rmse=0.019453 lr=0.079270 [1,0]:INFO:root:Epoch[192] Batch[500] Loss[2.542] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[500] rmse=0.019440 lr=0.079119 [1,2]:[ip-172-31-29-212][[55333,1],2][btl_tcp.c:559:mca_btl_tcp_recv_blocking] recv(116) failed: Connection reset by peer (104) [1,0]:INFO:root:Epoch[192] Batch[600] Loss[4.542] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[600] rmse=0.019413 lr=0.078968 [1,0]:INFO:root:Epoch[192] Batch[700] Loss[3.157] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[700] rmse=0.019420 lr=0.078818 [1,0]:INFO:root:Epoch[192] Batch[800] Loss[4.956] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[800] rmse=0.019442 lr=0.078667 [1,0]:INFO:root:Epoch[192] Batch[900] Loss[4.319] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[900] rmse=0.019437 lr=0.078517 [1,0]:INFO:root:Epoch[192] Batch[1000] Loss[4.140] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[1000] rmse=0.019429 lr=0.078366 [1,0]:INFO:root:Epoch[192] Batch[1100] Loss[4.664] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[1100] rmse=0.019433 lr=0.078216 [1,0]:INFO:root:Epoch[192] Batch[1200] Loss[2.564] [1,0]:INFO:root:Epoch[192] Rank[0] Batch[1200] rmse=0.019437 lr=0.078066 [1,0]:INFO:root:Epoch[192] Rank[0] Batch[1251] Time cost=398.52 Train-metric=0.019438 [1,0]:INFO:root:Epoch[192] Speed: 3214.47 samples/sec [1,0]:INFO:root:Epoch[193] Batch[100] Loss[2.590] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[100] rmse=0.019240 lr=0.077839 [1,0]:INFO:root:Epoch[193] Batch[200] Loss[2.512] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[200] rmse=0.019280 lr=0.077689 [1,0]:INFO:root:Epoch[193] Batch[300] Loss[2.602] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[300] rmse=0.019364 lr=0.077540 [1,0]:INFO:root:Epoch[193] Batch[400] Loss[4.966] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[400] rmse=0.019381 lr=0.077390 [1,0]:INFO:root:Epoch[193] Batch[500] Loss[4.897] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[500] rmse=0.019376 lr=0.077240 [1,0]:INFO:root:Epoch[193] Batch[600] Loss[3.420] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[600] rmse=0.019372 lr=0.077091 [1,0]:INFO:root:Epoch[193] Batch[700] Loss[2.587] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[700] rmse=0.019384 lr=0.076942 [1,0]:INFO:root:Epoch[193] Batch[800] Loss[5.273] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[800] rmse=0.019403 lr=0.076792 [1,0]:INFO:root:Epoch[193] Batch[900] Loss[2.638] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[900] rmse=0.019399 lr=0.076643 [1,0]:INFO:root:Epoch[193] Batch[1000] Loss[3.403] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[1000] rmse=0.019418 lr=0.076494 [1,0]:INFO:root:Epoch[193] Batch[1100] Loss[2.616] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[1100] rmse=0.019415 lr=0.076345 [1,0]:INFO:root:Epoch[193] Batch[1200] Loss[3.782] [1,0]:INFO:root:Epoch[193] Rank[0] Batch[1200] rmse=0.019404 lr=0.076196 [1,0]:INFO:root:Epoch[193] Rank[0] Batch[1251] Time cost=398.90 Train-metric=0.019417 [1,0]:INFO:root:Epoch[193] Speed: 3211.39 samples/sec [1,0]:INFO:root:Epoch[194] Batch[100] Loss[4.145] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[100] rmse=0.019385 lr=0.075972 [1,0]:INFO:root:Epoch[194] Batch[200] Loss[2.343] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[200] rmse=0.019352 lr=0.075823 [1,0]:INFO:root:Epoch[194] Batch[300] Loss[2.595] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[300] rmse=0.019370 lr=0.075675 [1,0]:INFO:root:Epoch[194] Batch[400] Loss[2.529] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[400] rmse=0.019392 lr=0.075527 [1,0]:INFO:root:Epoch[194] Batch[500] Loss[2.564] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[500] rmse=0.019392 lr=0.075379 [1,0]:INFO:root:Epoch[194] Batch[600] Loss[4.069] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[600] rmse=0.019422 lr=0.075231 [1,0]:INFO:root:Epoch[194] Batch[700] Loss[3.533] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[700] rmse=0.019423 lr=0.075083 [1,0]:INFO:root:Epoch[194] Batch[800] Loss[2.485] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[800] rmse=0.019427 lr=0.074935 [1,0]:INFO:root:Epoch[194] Batch[900] Loss[2.507] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[900] rmse=0.019414 lr=0.074787 [1,0]:INFO:root:Epoch[194] Batch[1000] Loss[2.988] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[1000] rmse=0.019424 lr=0.074639 [1,0]:INFO:root:Epoch[194] Batch[1100] Loss[2.484] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[1100] rmse=0.019438 lr=0.074492 [1,0]:INFO:root:Epoch[194] Batch[1200] Loss[3.202] [1,0]:INFO:root:Epoch[194] Rank[0] Batch[1200] rmse=0.019434 lr=0.074345 [1,0]:INFO:root:Epoch[194] Rank[0] Batch[1251] Time cost=398.74 Train-metric=0.019440 [1,0]:INFO:root:Epoch[194] Speed: 3212.65 samples/sec [1,0]:INFO:root:Epoch[194] Rank[0] Validation-accuracy=0.694700 Validation-top_k_accuracy_5=0.894060 [1,0]:INFO:root:Epoch[195] Batch[100] Loss[4.954] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[100] rmse=0.019319 lr=0.074122 [1,0]:INFO:root:Epoch[195] Batch[200] Loss[2.335] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[200] rmse=0.019321 lr=0.073975 [1,0]:INFO:root:Epoch[195] Batch[300] Loss[4.183] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[300] rmse=0.019301 lr=0.073828 [1,0]:INFO:root:Epoch[195] Batch[400] Loss[2.412] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[400] rmse=0.019350 lr=0.073681 [1,0]:INFO:root:Epoch[195] Batch[500] Loss[2.840] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[500] rmse=0.019338 lr=0.073534 [1,0]:INFO:root:Epoch[195] Batch[600] Loss[2.473] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[600] rmse=0.019373 lr=0.073388 [1,0]:INFO:root:Epoch[195] Batch[700] Loss[4.279] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[700] rmse=0.019376 lr=0.073241 [1,0]:INFO:root:Epoch[195] Batch[800] Loss[3.362] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[800] rmse=0.019383 lr=0.073095 [1,0]:INFO:root:Epoch[195] Batch[900] Loss[3.538] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[900] rmse=0.019384 lr=0.072948 [1,0]:INFO:root:Epoch[195] Batch[1000] Loss[2.687] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[1000] rmse=0.019403 lr=0.072802 [1,0]:INFO:root:Epoch[195] Batch[1100] Loss[2.312] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[1100] rmse=0.019407 lr=0.072656 [1,0]:INFO:root:Epoch[195] Batch[1200] Loss[3.666] [1,0]:INFO:root:Epoch[195] Rank[0] Batch[1200] rmse=0.019412 lr=0.072510 [1,0]:INFO:root:Epoch[195] Rank[0] Batch[1251] Time cost=398.65 Train-metric=0.019418 [1,0]:INFO:root:Epoch[195] Speed: 3213.37 samples/sec [1,0]:INFO:root:Epoch[196] Batch[100] Loss[2.546] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[100] rmse=0.019294 lr=0.072290 [1,0]:INFO:root:Epoch[196] Batch[200] Loss[2.595] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[200] rmse=0.019302 lr=0.072144 [1,0]:INFO:root:Epoch[196] Batch[300] Loss[2.867] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[300] rmse=0.019372 lr=0.071999 [1,0]:INFO:root:Epoch[196] Batch[400] Loss[4.615] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[400] rmse=0.019343 lr=0.071853 [1,0]:INFO:root:Epoch[196] Batch[500] Loss[2.675] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[500] rmse=0.019321 lr=0.071708 [1,0]:INFO:root:Epoch[196] Batch[600] Loss[2.897] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[600] rmse=0.019333 lr=0.071563 [1,0]:INFO:root:Epoch[196] Batch[700] Loss[3.235] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[700] rmse=0.019333 lr=0.071418 [1,0]:INFO:root:Epoch[196] Batch[800] Loss[2.357] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[800] rmse=0.019326 lr=0.071273 [1,0]:INFO:root:Epoch[196] Batch[900] Loss[2.816] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[900] rmse=0.019329 lr=0.071128 [1,0]:INFO:root:Epoch[196] Batch[1000] Loss[2.453] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[1000] rmse=0.019339 lr=0.070983 [1,0]:INFO:root:Epoch[196] Batch[1100] Loss[5.062] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[1100] rmse=0.019357 lr=0.070838 [1,0]:INFO:root:Epoch[196] Batch[1200] Loss[2.772] [1,0]:INFO:root:Epoch[196] Rank[0] Batch[1200] rmse=0.019377 lr=0.070694 [1,0]:INFO:root:Epoch[196] Rank[0] Batch[1251] Time cost=398.91 Train-metric=0.019373 [1,0]:INFO:root:Epoch[196] Speed: 3211.31 samples/sec [1,0]:INFO:root:Epoch[197] Batch[100] Loss[2.466] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[100] rmse=0.019284 lr=0.070476 [1,0]:INFO:root:Epoch[197] Batch[200] Loss[3.091] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[200] rmse=0.019273 lr=0.070332 [1,0]:INFO:root:Epoch[197] Batch[300] Loss[2.371] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[300] rmse=0.019286 lr=0.070187 [1,0]:INFO:root:Epoch[197] Batch[400] Loss[4.283] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[400] rmse=0.019345 lr=0.070043 [1,0]:INFO:root:Epoch[197] Batch[500] Loss[2.529] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[500] rmse=0.019329 lr=0.069900 [1,0]:INFO:root:Epoch[197] Batch[600] Loss[2.731] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[600] rmse=0.019333 lr=0.069756 [1,0]:INFO:root:Epoch[197] Batch[700] Loss[2.612] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[700] rmse=0.019332 lr=0.069612 [1,0]:INFO:root:Epoch[197] Batch[800] Loss[3.058] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[800] rmse=0.019342 lr=0.069469 [1,0]:INFO:root:Epoch[197] Batch[900] Loss[3.293] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[900] rmse=0.019331 lr=0.069325 [1,0]:INFO:root:Epoch[197] Batch[1000] Loss[2.630] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[1000] rmse=0.019339 lr=0.069182 [1,0]:INFO:root:Epoch[197] Batch[1100] Loss[4.400] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[1100] rmse=0.019346 lr=0.069039 [1,0]:INFO:root:Epoch[197] Batch[1200] Loss[2.890] [1,0]:INFO:root:Epoch[197] Rank[0] Batch[1200] rmse=0.019334 lr=0.068896 [1,0]:INFO:root:Epoch[197] Rank[0] Batch[1251] Time cost=399.07 Train-metric=0.019336 [1,0]:INFO:root:Epoch[197] Speed: 3210.03 samples/sec [1,0]:INFO:root:Epoch[198] Batch[100] Loss[4.309] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[100] rmse=0.019157 lr=0.068680 [1,0]:INFO:root:Epoch[198] Batch[200] Loss[2.738] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[200] rmse=0.019191 lr=0.068537 [1,0]:INFO:root:Epoch[198] Batch[300] Loss[2.336] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[300] rmse=0.019214 lr=0.068394 [1,0]:INFO:root:Epoch[198] Batch[400] Loss[4.716] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[400] rmse=0.019225 lr=0.068252 [1,0]:INFO:root:Epoch[198] Batch[500] Loss[2.717] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[500] rmse=0.019239 lr=0.068109 [1,0]:INFO:root:Epoch[198] Batch[600] Loss[2.987] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[600] rmse=0.019267 lr=0.067967 [1,0]:INFO:root:Epoch[198] Batch[700] Loss[2.111] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[700] rmse=0.019261 lr=0.067825 [1,0]:INFO:root:Epoch[198] Batch[800] Loss[2.530] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[800] rmse=0.019284 lr=0.067683 [1,0]:INFO:root:Epoch[198] Batch[900] Loss[3.174] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[900] rmse=0.019300 lr=0.067541 [1,0]:INFO:root:Epoch[198] Batch[1000] Loss[3.435] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[1000] rmse=0.019317 lr=0.067399 [1,0]:INFO:root:Epoch[198] Batch[1100] Loss[3.687] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[1100] rmse=0.019339 lr=0.067257 [1,0]:INFO:root:Epoch[198] Batch[1200] Loss[2.418] [1,0]:INFO:root:Epoch[198] Rank[0] Batch[1200] rmse=0.019346 lr=0.067116 [1,0]:INFO:root:Epoch[198] Rank[0] Batch[1251] Time cost=401.20 Train-metric=0.019339 [1,0]:INFO:root:Epoch[198] Speed: 3193.02 samples/sec [1,0]:INFO:root:Epoch[199] Batch[100] Loss[4.314] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[100] rmse=0.019321 lr=0.066902 [1,0]:INFO:root:Epoch[199] Batch[200] Loss[2.261] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[200] rmse=0.019269 lr=0.066761 [1,0]:INFO:root:Epoch[199] Batch[300] Loss[2.401] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[300] rmse=0.019262 lr=0.066620 [1,0]:INFO:root:Epoch[199] Batch[400] Loss[3.011] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[400] rmse=0.019222 lr=0.066479 [1,0]:INFO:root:Epoch[199] Batch[500] Loss[2.949] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[500] rmse=0.019225 lr=0.066338 [1,0]:INFO:root:Epoch[199] Batch[600] Loss[2.534] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[600] rmse=0.019224 lr=0.066197 [1,0]:INFO:root:Epoch[199] Batch[700] Loss[4.578] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[700] rmse=0.019243 lr=0.066056 [1,0]:INFO:root:Epoch[199] Batch[800] Loss[2.554] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[800] rmse=0.019240 lr=0.065916 [1,0]:INFO:root:Epoch[199] Batch[900] Loss[2.421] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[900] rmse=0.019253 lr=0.065775 [1,0]:INFO:root:Epoch[199] Batch[1000] Loss[3.723] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[1000] rmse=0.019256 lr=0.065635 [1,0]:INFO:root:Epoch[199] Batch[1100] Loss[3.081] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[1100] rmse=0.019267 lr=0.065495 [1,0]:INFO:root:Epoch[199] Batch[1200] Loss[2.621] [1,0]:INFO:root:Epoch[199] Rank[0] Batch[1200] rmse=0.019271 lr=0.065355 [1,0]:INFO:root:Epoch[199] Rank[0] Batch[1251] Time cost=401.49 Train-metric=0.019277 [1,0]:INFO:root:Epoch[199] Speed: 3190.66 samples/sec [1,0]:INFO:root:Epoch[199] Rank[0] Validation-accuracy=0.698140 Validation-top_k_accuracy_5=0.894400 [1,0]:INFO:root:Epoch[200] Batch[100] Loss[3.660] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[100] rmse=0.019037 lr=0.065143 [1,0]:INFO:root:Epoch[200] Batch[200] Loss[2.923] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[200] rmse=0.019170 lr=0.065003 [1,0]:INFO:root:Epoch[200] Batch[300] Loss[4.595] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[300] rmse=0.019231 lr=0.064864 [1,0]:INFO:root:Epoch[200] Batch[400] Loss[2.820] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[400] rmse=0.019229 lr=0.064724 [1,0]:INFO:root:Epoch[200] Batch[500] Loss[2.583] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[500] rmse=0.019229 lr=0.064585 [1,0]:INFO:root:Epoch[200] Batch[600] Loss[4.580] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[600] rmse=0.019247 lr=0.064445 [1,0]:INFO:root:Epoch[200] Batch[700] Loss[2.668] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[700] rmse=0.019259 lr=0.064306 [1,0]:INFO:root:Epoch[200] Batch[800] Loss[2.507] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[800] rmse=0.019246 lr=0.064167 [1,0]:INFO:root:Epoch[200] Batch[900] Loss[2.769] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[900] rmse=0.019242 lr=0.064028 [1,0]:INFO:root:Epoch[200] Batch[1000] Loss[2.520] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[1000] rmse=0.019236 lr=0.063889 [1,0]:INFO:root:Epoch[200] Batch[1100] Loss[3.217] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[1100] rmse=0.019259 lr=0.063751 [1,0]:INFO:root:Epoch[200] Batch[1200] Loss[2.475] [1,0]:INFO:root:Epoch[200] Rank[0] Batch[1200] rmse=0.019259 lr=0.063612 [1,0]:INFO:root:Epoch[200] Rank[0] Batch[1251] Time cost=401.57 Train-metric=0.019256 [1,0]:INFO:root:Epoch[200] Speed: 3190.02 samples/sec [1,0]:INFO:root:Epoch[201] Batch[100] Loss[5.077] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[100] rmse=0.019120 lr=0.063403 [1,0]:INFO:root:Epoch[201] Batch[200] Loss[2.344] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[200] rmse=0.019160 lr=0.063265 [1,0]:INFO:root:Epoch[201] Batch[300] Loss[2.550] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[300] rmse=0.019141 lr=0.063127 [1,0]:INFO:root:Epoch[201] Batch[400] Loss[3.036] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[400] rmse=0.019147 lr=0.062989 [1,0]:INFO:root:Epoch[201] Batch[500] Loss[4.454] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[500] rmse=0.019158 lr=0.062851 [1,0]:INFO:root:Epoch[201] Batch[600] Loss[5.054] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[600] rmse=0.019172 lr=0.062713 [1,0]:INFO:root:Epoch[201] Batch[700] Loss[4.118] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[700] rmse=0.019181 lr=0.062575 [1,0]:INFO:root:Epoch[201] Batch[800] Loss[2.780] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[800] rmse=0.019183 lr=0.062438 [1,0]:INFO:root:Epoch[201] Batch[900] Loss[2.433] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[900] rmse=0.019196 lr=0.062300 [1,0]:INFO:root:Epoch[201] Batch[1000] Loss[2.607] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[1000] rmse=0.019195 lr=0.062163 [1,0]:INFO:root:Epoch[201] Batch[1100] Loss[2.214] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[1100] rmse=0.019196 lr=0.062026 [1,0]:INFO:root:Epoch[201] Batch[1200] Loss[4.973] [1,0]:INFO:root:Epoch[201] Rank[0] Batch[1200] rmse=0.019191 lr=0.061889 [1,0]:INFO:root:Epoch[201] Rank[0] Batch[1251] Time cost=403.37 Train-metric=0.019193 [1,0]:INFO:root:Epoch[201] Speed: 3175.83 samples/sec [1,0]:INFO:root:Epoch[202] Batch[100] Loss[4.071] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[100] rmse=0.019184 lr=0.061682 [1,0]:INFO:root:Epoch[202] Batch[200] Loss[2.585] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[200] rmse=0.019200 lr=0.061546 [1,0]:INFO:root:Epoch[202] Batch[300] Loss[3.013] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[300] rmse=0.019154 lr=0.061409 [1,0]:INFO:root:Epoch[202] Batch[400] Loss[2.357] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[400] rmse=0.019171 lr=0.061273 [1,0]:INFO:root:Epoch[202] Batch[500] Loss[4.812] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[500] rmse=0.019186 lr=0.061136 [1,0]:INFO:root:Epoch[202] Batch[600] Loss[4.839] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[600] rmse=0.019181 lr=0.061000 [1,0]:INFO:root:Epoch[202] Batch[700] Loss[4.511] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[700] rmse=0.019200 lr=0.060864 [1,0]:INFO:root:Epoch[202] Batch[800] Loss[2.190] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[800] rmse=0.019200 lr=0.060728 [1,0]:INFO:root:Epoch[202] Batch[900] Loss[2.742] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[900] rmse=0.019194 lr=0.060592 [1,0]:INFO:root:Epoch[202] Batch[1000] Loss[2.703] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[1000] rmse=0.019199 lr=0.060456 [1,0]:INFO:root:Epoch[202] Batch[1100] Loss[4.907] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[1100] rmse=0.019191 lr=0.060321 [1,0]:INFO:root:Epoch[202] Batch[1200] Loss[4.531] [1,0]:INFO:root:Epoch[202] Rank[0] Batch[1200] rmse=0.019206 lr=0.060185 [1,0]:INFO:root:Epoch[202] Rank[0] Batch[1251] Time cost=400.35 Train-metric=0.019205 [1,0]:INFO:root:Epoch[202] Speed: 3199.75 samples/sec [1,0]:INFO:root:Epoch[203] Batch[100] Loss[2.534] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[100] rmse=0.019040 lr=0.059981 [1,0]:INFO:root:Epoch[203] Batch[200] Loss[2.482] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[200] rmse=0.019020 lr=0.059846 [1,0]:INFO:root:Epoch[203] Batch[300] Loss[3.949] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[300] rmse=0.019026 lr=0.059711 [1,0]:INFO:root:Epoch[203] Batch[400] Loss[2.579] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[400] rmse=0.019031 lr=0.059576 [1,0]:INFO:root:Epoch[203] Batch[500] Loss[2.678] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[500] rmse=0.019046 lr=0.059441 [1,0]:INFO:root:Epoch[203] Batch[600] Loss[2.784] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[600] rmse=0.019060 lr=0.059306 [1,0]:INFO:root:Epoch[203] Batch[700] Loss[2.569] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[700] rmse=0.019076 lr=0.059172 [1,0]:INFO:root:Epoch[203] Batch[800] Loss[3.587] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[800] rmse=0.019127 lr=0.059037 [1,0]:INFO:root:Epoch[203] Batch[900] Loss[4.768] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[900] rmse=0.019140 lr=0.058903 [1,0]:INFO:root:Epoch[203] Batch[1000] Loss[4.202] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[1000] rmse=0.019150 lr=0.058769 [1,0]:INFO:root:Epoch[203] Batch[1100] Loss[5.072] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[1100] rmse=0.019153 lr=0.058635 [1,0]:INFO:root:Epoch[203] Batch[1200] Loss[3.913] [1,0]:INFO:root:Epoch[203] Rank[0] Batch[1200] rmse=0.019166 lr=0.058501 [1,0]:INFO:root:Epoch[203] Rank[0] Batch[1251] Time cost=399.99 Train-metric=0.019160 [1,0]:INFO:root:Epoch[203] Speed: 3202.66 samples/sec [1,0]:INFO:root:Epoch[204] Batch[100] Loss[2.009] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[100] rmse=0.018992 lr=0.058299 [1,0]:INFO:root:Epoch[204] Batch[200] Loss[2.497] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[200] rmse=0.019015 lr=0.058165 [1,0]:INFO:root:Epoch[204] Batch[300] Loss[2.446] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[300] rmse=0.019066 lr=0.058032 [1,0]:INFO:root:Epoch[204] Batch[400] Loss[4.072] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[400] rmse=0.019108 lr=0.057899 [1,0]:INFO:root:Epoch[204] Batch[500] Loss[2.659] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[500] rmse=0.019104 lr=0.057765 [1,0]:INFO:root:Epoch[204] Batch[600] Loss[3.047] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[600] rmse=0.019124 lr=0.057632 [1,0]:INFO:root:Epoch[204] Batch[700] Loss[2.808] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[700] rmse=0.019137 lr=0.057499 [1,0]:INFO:root:Epoch[204] Batch[800] Loss[2.880] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[800] rmse=0.019138 lr=0.057367 [1,0]:INFO:root:Epoch[204] Batch[900] Loss[2.547] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[900] rmse=0.019152 lr=0.057234 [1,0]:INFO:root:Epoch[204] Batch[1000] Loss[3.037] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[1000] rmse=0.019151 lr=0.057101 [1,0]:INFO:root:Epoch[204] Batch[1100] Loss[2.323] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[1100] rmse=0.019161 lr=0.056969 [1,0]:INFO:root:Epoch[204] Batch[1200] Loss[2.647] [1,0]:INFO:root:Epoch[204] Rank[0] Batch[1200] rmse=0.019169 lr=0.056837 [1,0]:INFO:root:Epoch[204] Rank[0] Batch[1251] Time cost=398.85 Train-metric=0.019168 [1,0]:INFO:root:Epoch[204] Speed: 3211.82 samples/sec [1,0]:INFO:root:Epoch[204] Rank[0] Validation-accuracy=0.702940 Validation-top_k_accuracy_5=0.898860 [1,0]:INFO:root:Epoch[205] Batch[100] Loss[4.613] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[100] rmse=0.019023 lr=0.056637 [1,0]:INFO:root:Epoch[205] Batch[200] Loss[3.752] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[200] rmse=0.019057 lr=0.056505 [1,0]:INFO:root:Epoch[205] Batch[300] Loss[3.026] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[300] rmse=0.019050 lr=0.056373 [1,0]:INFO:root:Epoch[205] Batch[400] Loss[4.325] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[400] rmse=0.019037 lr=0.056242 [1,0]:INFO:root:Epoch[205] Batch[500] Loss[3.576] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[500] rmse=0.019024 lr=0.056110 [1,0]:INFO:root:Epoch[205] Batch[600] Loss[2.414] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[600] rmse=0.019029 lr=0.055979 [1,0]:INFO:root:Epoch[205] Batch[700] Loss[2.709] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[700] rmse=0.019038 lr=0.055847 [1,0]:INFO:root:Epoch[205] Batch[800] Loss[4.025] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[800] rmse=0.019066 lr=0.055716 [1,0]:INFO:root:Epoch[205] Batch[900] Loss[4.236] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[900] rmse=0.019064 lr=0.055585 [1,0]:INFO:root:Epoch[205] Batch[1000] Loss[2.516] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[1000] rmse=0.019088 lr=0.055454 [1,0]:INFO:root:Epoch[205] Batch[1100] Loss[4.850] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[1100] rmse=0.019100 lr=0.055323 [1,0]:INFO:root:Epoch[205] Batch[1200] Loss[2.387] [1,0]:INFO:root:Epoch[205] Rank[0] Batch[1200] rmse=0.019108 lr=0.055192 [1,0]:INFO:root:Epoch[205] Rank[0] Batch[1251] Time cost=398.60 Train-metric=0.019105 [1,0]:INFO:root:Epoch[205] Speed: 3213.79 samples/sec [1,0]:INFO:root:Epoch[206] Batch[100] Loss[2.559] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[100] rmse=0.019131 lr=0.054995 [1,0]:INFO:root:Epoch[206] Batch[200] Loss[3.618] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[200] rmse=0.019129 lr=0.054865 [1,0]:INFO:root:Epoch[206] Batch[300] Loss[2.651] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[300] rmse=0.019152 lr=0.054735 [1,0]:INFO:root:Epoch[206] Batch[400] Loss[4.035] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[400] rmse=0.019132 lr=0.054605 [1,0]:INFO:root:Epoch[206] Batch[500] Loss[3.025] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[500] rmse=0.019101 lr=0.054475 [1,0]:INFO:root:Epoch[206] Batch[600] Loss[2.491] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[600] rmse=0.019095 lr=0.054345 [1,0]:INFO:root:Epoch[206] Batch[700] Loss[2.475] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[700] rmse=0.019090 lr=0.054215 [1,0]:INFO:root:Epoch[206] Batch[800] Loss[2.530] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[800] rmse=0.019093 lr=0.054086 [1,0]:INFO:root:Epoch[206] Batch[900] Loss[2.591] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[900] rmse=0.019088 lr=0.053956 [1,0]:INFO:root:Epoch[206] Batch[1000] Loss[2.541] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[1000] rmse=0.019091 lr=0.053827 [1,0]:INFO:root:Epoch[206] Batch[1100] Loss[2.920] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[1100] rmse=0.019096 lr=0.053698 [1,0]:INFO:root:Epoch[206] Batch[1200] Loss[2.320] [1,0]:INFO:root:Epoch[206] Rank[0] Batch[1200] rmse=0.019108 lr=0.053568 [1,0]:INFO:root:Epoch[206] Rank[0] Batch[1251] Time cost=398.41 Train-metric=0.019105 [1,0]:INFO:root:Epoch[206] Speed: 3215.32 samples/sec [1,0]:INFO:root:Epoch[207] Batch[100] Loss[2.685] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[100] rmse=0.019015 lr=0.053374 [1,0]:INFO:root:Epoch[207] Batch[200] Loss[2.963] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[200] rmse=0.019043 lr=0.053245 [1,0]:INFO:root:Epoch[207] Batch[300] Loss[2.579] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[300] rmse=0.019063 lr=0.053117 [1,0]:INFO:root:Epoch[207] Batch[400] Loss[2.565] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[400] rmse=0.019053 lr=0.052988 [1,0]:INFO:root:Epoch[207] Batch[500] Loss[2.256] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[500] rmse=0.019055 lr=0.052860 [1,0]:INFO:root:Epoch[207] Batch[600] Loss[2.415] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[600] rmse=0.019035 lr=0.052732 [1,0]:INFO:root:Epoch[207] Batch[700] Loss[2.555] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[700] rmse=0.019043 lr=0.052603 [1,0]:INFO:root:Epoch[207] Batch[800] Loss[4.473] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[800] rmse=0.019050 lr=0.052476 [1,0]:INFO:root:Epoch[207] Batch[900] Loss[4.691] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[900] rmse=0.019053 lr=0.052348 [1,0]:INFO:root:Epoch[207] Batch[1000] Loss[2.247] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[1000] rmse=0.019051 lr=0.052220 [1,0]:INFO:root:Epoch[207] Batch[1100] Loss[2.920] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[1100] rmse=0.019061 lr=0.052093 [1,0]:INFO:root:Epoch[207] Batch[1200] Loss[2.460] [1,0]:INFO:root:Epoch[207] Rank[0] Batch[1200] rmse=0.019070 lr=0.051965 [1,0]:INFO:root:Epoch[207] Rank[0] Batch[1251] Time cost=398.95 Train-metric=0.019075 [1,0]:INFO:root:Epoch[207] Speed: 3210.97 samples/sec [1,0]:INFO:root:Epoch[208] Batch[100] Loss[2.392] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[100] rmse=0.018859 lr=0.051773 [1,0]:INFO:root:Epoch[208] Batch[200] Loss[2.609] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[200] rmse=0.018918 lr=0.051646 [1,0]:INFO:root:Epoch[208] Batch[300] Loss[2.668] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[300] rmse=0.018993 lr=0.051519 [1,0]:INFO:root:Epoch[208] Batch[400] Loss[2.062] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[400] rmse=0.019008 lr=0.051392 [1,0]:INFO:root:Epoch[208] Batch[500] Loss[2.659] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[500] rmse=0.019064 lr=0.051265 [1,0]:INFO:root:Epoch[208] Batch[600] Loss[3.645] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[600] rmse=0.019068 lr=0.051139 [1,0]:INFO:root:Epoch[208] Batch[700] Loss[2.408] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[700] rmse=0.019063 lr=0.051013 [1,0]:INFO:root:Epoch[208] Batch[800] Loss[2.772] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[800] rmse=0.019076 lr=0.050886 [1,0]:INFO:root:Epoch[208] Batch[900] Loss[4.364] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[900] rmse=0.019075 lr=0.050760 [1,0]:INFO:root:Epoch[208] Batch[1000] Loss[2.616] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[1000] rmse=0.019079 lr=0.050634 [1,0]:INFO:root:Epoch[208] Batch[1100] Loss[2.204] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[1100] rmse=0.019077 lr=0.050508 [1,0]:INFO:root:Epoch[208] Batch[1200] Loss[4.946] [1,0]:INFO:root:Epoch[208] Rank[0] Batch[1200] rmse=0.019073 lr=0.050383 [1,0]:INFO:root:Epoch[208] Rank[0] Batch[1251] Time cost=399.34 Train-metric=0.019069 [1,0]:INFO:root:Epoch[208] Speed: 3207.85 samples/sec [1,0]:INFO:root:Epoch[209] Batch[100] Loss[3.771] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[100] rmse=0.018878 lr=0.050193 [1,0]:INFO:root:Epoch[209] Batch[200] Loss[2.349] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[200] rmse=0.018892 lr=0.050067 [1,0]:INFO:root:Epoch[209] Batch[300] Loss[2.568] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[300] rmse=0.018921 lr=0.049942 [1,0]:INFO:root:Epoch[209] Batch[400] Loss[2.784] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[400] rmse=0.018904 lr=0.049817 [1,0]:INFO:root:Epoch[209] Batch[500] Loss[2.236] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[500] rmse=0.018917 lr=0.049692 [1,0]:INFO:root:Epoch[209] Batch[600] Loss[4.509] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[600] rmse=0.018939 lr=0.049567 [1,0]:INFO:root:Epoch[209] Batch[700] Loss[2.739] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[700] rmse=0.018947 lr=0.049443 [1,0]:INFO:root:Epoch[209] Batch[800] Loss[3.449] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[800] rmse=0.018971 lr=0.049318 [1,0]:INFO:root:Epoch[209] Batch[900] Loss[2.229] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[900] rmse=0.018979 lr=0.049193 [1,0]:INFO:root:Epoch[209] Batch[1000] Loss[2.527] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[1000] rmse=0.018992 lr=0.049069 [1,0]:INFO:root:Epoch[209] Batch[1100] Loss[4.521] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[1100] rmse=0.019002 lr=0.048945 [1,0]:INFO:root:Epoch[209] Batch[1200] Loss[2.436] [1,0]:INFO:root:Epoch[209] Rank[0] Batch[1200] rmse=0.019001 lr=0.048821 [1,0]:INFO:root:Epoch[209] Rank[0] Batch[1251] Time cost=399.51 Train-metric=0.019016 [1,0]:INFO:root:Epoch[209] Speed: 3206.47 samples/sec [1,0]:INFO:root:Epoch[209] Rank[0] Validation-accuracy=0.711000 Validation-top_k_accuracy_5=0.902380 [1,0]:INFO:root:Epoch[210] Batch[100] Loss[4.221] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[100] rmse=0.018878 lr=0.048634 [1,0]:INFO:root:Epoch[210] Batch[200] Loss[2.171] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[200] rmse=0.018900 lr=0.048510 [1,0]:INFO:root:Epoch[210] Batch[300] Loss[2.783] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[300] rmse=0.018893 lr=0.048387 [1,0]:INFO:root:Epoch[210] Batch[400] Loss[2.627] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[400] rmse=0.018923 lr=0.048263 [1,0]:INFO:root:Epoch[210] Batch[500] Loss[2.743] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[500] rmse=0.018950 lr=0.048140 [1,0]:INFO:root:Epoch[210] Batch[600] Loss[2.319] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[600] rmse=0.018975 lr=0.048017 [1,0]:INFO:root:Epoch[210] Batch[700] Loss[2.292] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[700] rmse=0.019000 lr=0.047894 [1,0]:INFO:root:Epoch[210] Batch[800] Loss[4.869] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[800] rmse=0.018993 lr=0.047771 [1,0]:INFO:root:Epoch[210] Batch[900] Loss[2.514] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[900] rmse=0.018995 lr=0.047648 [1,0]:INFO:root:Epoch[210] Batch[1000] Loss[4.834] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[1000] rmse=0.019011 lr=0.047525 [1,0]:INFO:root:Epoch[210] Batch[1100] Loss[2.560] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[1100] rmse=0.019011 lr=0.047403 [1,0]:INFO:root:Epoch[210] Batch[1200] Loss[2.788] [1,0]:INFO:root:Epoch[210] Rank[0] Batch[1200] rmse=0.019027 lr=0.047280 [1,0]:INFO:root:Epoch[210] Rank[0] Batch[1251] Time cost=413.22 Train-metric=0.019029 [1,0]:INFO:root:Epoch[210] Speed: 3100.08 samples/sec [1,0]:INFO:root:Epoch[211] Batch[100] Loss[2.480] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[100] rmse=0.018746 lr=0.047096 [1,0]:INFO:root:Epoch[211] Batch[200] Loss[2.574] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[200] rmse=0.018833 lr=0.046974 [1,0]:INFO:root:Epoch[211] Batch[300] Loss[2.250] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[300] rmse=0.018877 lr=0.046852 [1,0]:INFO:root:Epoch[211] Batch[400] Loss[5.086] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[400] rmse=0.018873 lr=0.046730 [1,0]:INFO:root:Epoch[211] Batch[500] Loss[3.953] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[500] rmse=0.018869 lr=0.046609 [1,0]:INFO:root:Epoch[211] Batch[600] Loss[2.175] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[600] rmse=0.018879 lr=0.046487 [1,0]:INFO:root:Epoch[211] Batch[700] Loss[5.018] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[700] rmse=0.018885 lr=0.046366 [1,0]:INFO:root:Epoch[211] Batch[800] Loss[2.366] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[800] rmse=0.018909 lr=0.046245 [1,0]:INFO:root:Epoch[211] Batch[900] Loss[4.290] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[900] rmse=0.018917 lr=0.046124 [1,0]:INFO:root:Epoch[211] Batch[1000] Loss[2.360] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[1000] rmse=0.018923 lr=0.046003 [1,0]:INFO:root:Epoch[211] Batch[1100] Loss[4.552] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[1100] rmse=0.018928 lr=0.045882 [1,0]:INFO:root:Epoch[211] Batch[1200] Loss[2.790] [1,0]:INFO:root:Epoch[211] Rank[0] Batch[1200] rmse=0.018929 lr=0.045762 [1,0]:INFO:root:Epoch[211] Rank[0] Batch[1251] Time cost=400.01 Train-metric=0.018940 [1,0]:INFO:root:Epoch[211] Speed: 3202.46 samples/sec [1,0]:INFO:root:Epoch[212] Batch[100] Loss[3.054] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[100] rmse=0.018807 lr=0.045580 [1,0]:INFO:root:Epoch[212] Batch[200] Loss[2.342] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[200] rmse=0.018833 lr=0.045459 [1,0]:INFO:root:Epoch[212] Batch[300] Loss[3.573] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[300] rmse=0.018843 lr=0.045339 [1,0]:INFO:root:Epoch[212] Batch[400] Loss[4.786] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[400] rmse=0.018900 lr=0.045219 [1,0]:INFO:root:Epoch[212] Batch[500] Loss[4.406] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[500] rmse=0.018918 lr=0.045099 [1,0]:INFO:root:Epoch[212] Batch[600] Loss[2.388] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[600] rmse=0.018940 lr=0.044980 [1,0]:INFO:root:Epoch[212] Batch[700] Loss[3.410] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[700] rmse=0.018931 lr=0.044860 [1,0]:INFO:root:Epoch[212] Batch[800] Loss[4.162] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[800] rmse=0.018938 lr=0.044741 [1,0]:INFO:root:Epoch[212] Batch[900] Loss[2.806] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[900] rmse=0.018940 lr=0.044621 [1,0]:INFO:root:Epoch[212] Batch[1000] Loss[2.431] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[1000] rmse=0.018931 lr=0.044502 [1,0]:INFO:root:Epoch[212] Batch[1100] Loss[2.635] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[1100] rmse=0.018931 lr=0.044383 [1,0]:INFO:root:Epoch[212] Batch[1200] Loss[3.911] [1,0]:INFO:root:Epoch[212] Rank[0] Batch[1200] rmse=0.018933 lr=0.044264 [1,0]:INFO:root:Epoch[212] Rank[0] Batch[1251] Time cost=399.09 Train-metric=0.018931 [1,0]:INFO:root:Epoch[212] Speed: 3209.84 samples/sec [1,0]:INFO:root:Epoch[213] Batch[100] Loss[2.937] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[100] rmse=0.018873 lr=0.044085 [1,0]:INFO:root:Epoch[213] Batch[200] Loss[5.110] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[200] rmse=0.018819 lr=0.043966 [1,0]:INFO:root:Epoch[213] Batch[300] Loss[2.479] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[300] rmse=0.018812 lr=0.043848 [1,0]:INFO:root:Epoch[213] Batch[400] Loss[3.732] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[400] rmse=0.018812 lr=0.043730 [1,0]:INFO:root:Epoch[213] Batch[500] Loss[2.553] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[500] rmse=0.018833 lr=0.043612 [1,0]:INFO:root:Epoch[213] Batch[600] Loss[3.374] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[600] rmse=0.018847 lr=0.043494 [1,0]:INFO:root:Epoch[213] Batch[700] Loss[4.163] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[700] rmse=0.018842 lr=0.043376 [1,0]:INFO:root:Epoch[213] Batch[800] Loss[2.677] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[800] rmse=0.018851 lr=0.043258 [1,0]:INFO:root:Epoch[213] Batch[900] Loss[3.478] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[900] rmse=0.018854 lr=0.043141 [1,0]:INFO:root:Epoch[213] Batch[1000] Loss[2.510] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[1000] rmse=0.018885 lr=0.043023 [1,0]:INFO:root:Epoch[213] Batch[1100] Loss[2.637] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[1100] rmse=0.018881 lr=0.042906 [1,0]:INFO:root:Epoch[213] Batch[1200] Loss[4.795] [1,0]:INFO:root:Epoch[213] Rank[0] Batch[1200] rmse=0.018886 lr=0.042789 [1,0]:INFO:root:Epoch[213] Rank[0] Batch[1251] Time cost=399.52 Train-metric=0.018890 [1,0]:INFO:root:Epoch[213] Speed: 3206.40 samples/sec [1,0]:INFO:root:Epoch[214] Batch[100] Loss[2.960] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[100] rmse=0.018789 lr=0.042612 [1,0]:INFO:root:Epoch[214] Batch[200] Loss[2.231] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[200] rmse=0.018736 lr=0.042495 [1,0]:INFO:root:Epoch[214] Batch[300] Loss[4.058] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[300] rmse=0.018857 lr=0.042379 [1,0]:INFO:root:Epoch[214] Batch[400] Loss[2.409] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[400] rmse=0.018847 lr=0.042262 [1,0]:INFO:root:Epoch[214] Batch[500] Loss[2.238] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[500] rmse=0.018806 lr=0.042146 [1,0]:INFO:root:Epoch[214] Batch[600] Loss[2.547] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[600] rmse=0.018824 lr=0.042030 [1,0]:INFO:root:Epoch[214] Batch[700] Loss[2.415] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[700] rmse=0.018812 lr=0.041914 [1,0]:INFO:root:Epoch[214] Batch[800] Loss[2.924] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[800] rmse=0.018815 lr=0.041798 [1,0]:INFO:root:Epoch[214] Batch[900] Loss[2.308] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[900] rmse=0.018806 lr=0.041682 [1,0]:INFO:root:Epoch[214] Batch[1000] Loss[2.437] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[1000] rmse=0.018813 lr=0.041566 [1,0]:INFO:root:Epoch[214] Batch[1100] Loss[3.914] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[1100] rmse=0.018820 lr=0.041451 [1,0]:INFO:root:Epoch[214] Batch[1200] Loss[2.589] [1,0]:INFO:root:Epoch[214] Rank[0] Batch[1200] rmse=0.018816 lr=0.041335 [1,0]:INFO:root:Epoch[214] Rank[0] Batch[1251] Time cost=399.01 Train-metric=0.018829 [1,0]:INFO:root:Epoch[214] Speed: 3210.52 samples/sec [1,0]:INFO:root:Epoch[214] Rank[0] Validation-accuracy=0.714180 Validation-top_k_accuracy_5=0.902960 [1,0]:INFO:root:Epoch[215] Batch[100] Loss[2.410] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[100] rmse=0.018812 lr=0.041161 [1,0]:INFO:root:Epoch[215] Batch[200] Loss[3.049] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[200] rmse=0.018798 lr=0.041046 [1,0]:INFO:root:Epoch[215] Batch[300] Loss[2.542] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[300] rmse=0.018792 lr=0.040931 [1,0]:INFO:root:Epoch[215] Batch[400] Loss[4.849] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[400] rmse=0.018767 lr=0.040817 [1,0]:INFO:root:Epoch[215] Batch[500] Loss[2.347] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[500] rmse=0.018790 lr=0.040702 [1,0]:INFO:root:Epoch[215] Batch[600] Loss[2.391] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[600] rmse=0.018795 lr=0.040588 [1,0]:INFO:root:Epoch[215] Batch[700] Loss[2.351] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[700] rmse=0.018811 lr=0.040473 [1,0]:INFO:root:Epoch[215] Batch[800] Loss[2.467] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[800] rmse=0.018808 lr=0.040359 [1,0]:INFO:root:Epoch[215] Batch[900] Loss[2.492] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[900] rmse=0.018805 lr=0.040245 [1,0]:INFO:root:Epoch[215] Batch[1000] Loss[2.965] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[1000] rmse=0.018814 lr=0.040131 [1,0]:INFO:root:Epoch[215] Batch[1100] Loss[3.447] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[1100] rmse=0.018812 lr=0.040018 [1,0]:INFO:root:Epoch[215] Batch[1200] Loss[4.862] [1,0]:INFO:root:Epoch[215] Rank[0] Batch[1200] rmse=0.018819 lr=0.039904 [1,0]:INFO:root:Epoch[215] Rank[0] Batch[1251] Time cost=398.32 Train-metric=0.018827 [1,0]:INFO:root:Epoch[215] Speed: 3216.06 samples/sec [1,0]:INFO:root:Epoch[216] Batch[100] Loss[4.603] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[100] rmse=0.018644 lr=0.039733 [1,0]:INFO:root:Epoch[216] Batch[200] Loss[2.370] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[200] rmse=0.018657 lr=0.039620 [1,0]:INFO:root:Epoch[216] Batch[300] Loss[2.264] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[300] rmse=0.018677 lr=0.039507 [1,0]:INFO:root:Epoch[216] Batch[400] Loss[2.876] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[400] rmse=0.018743 lr=0.039394 [1,0]:INFO:root:Epoch[216] Batch[500] Loss[4.250] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[500] rmse=0.018758 lr=0.039281 [1,0]:INFO:root:Epoch[216] Batch[600] Loss[2.320] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[600] rmse=0.018773 lr=0.039168 [1,0]:INFO:root:Epoch[216] Batch[700] Loss[3.586] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[700] rmse=0.018762 lr=0.039056 [1,0]:INFO:root:Epoch[216] Batch[800] Loss[2.403] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[800] rmse=0.018765 lr=0.038943 [1,0]:INFO:root:Epoch[216] Batch[900] Loss[3.370] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[900] rmse=0.018786 lr=0.038831 [1,0]:INFO:root:Epoch[216] Batch[1000] Loss[2.448] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[1000] rmse=0.018781 lr=0.038719 [1,0]:INFO:root:Epoch[216] Batch[1100] Loss[2.539] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[1100] rmse=0.018778 lr=0.038607 [1,0]:INFO:root:Epoch[216] Batch[1200] Loss[2.407] [1,0]:INFO:root:Epoch[216] Rank[0] Batch[1200] rmse=0.018784 lr=0.038495 [1,0]:INFO:root:Epoch[216] Rank[0] Batch[1251] Time cost=398.81 Train-metric=0.018779 [1,0]:INFO:root:Epoch[216] Speed: 3212.14 samples/sec [1,0]:INFO:root:Epoch[217] Batch[100] Loss[2.352] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[100] rmse=0.018843 lr=0.038327 [1,0]:INFO:root:Epoch[217] Batch[200] Loss[4.636] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[200] rmse=0.018767 lr=0.038216 [1,0]:INFO:root:Epoch[217] Batch[300] Loss[2.307] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[300] rmse=0.018808 lr=0.038104 [1,0]:INFO:root:Epoch[217] Batch[400] Loss[2.378] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[400] rmse=0.018801 lr=0.037993 [1,0]:INFO:root:Epoch[217] Batch[500] Loss[2.729] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[500] rmse=0.018773 lr=0.037882 [1,0]:INFO:root:Epoch[217] Batch[600] Loss[2.686] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[600] rmse=0.018755 lr=0.037771 [1,0]:INFO:root:Epoch[217] Batch[700] Loss[2.550] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[700] rmse=0.018756 lr=0.037661 [1,0]:INFO:root:Epoch[217] Batch[800] Loss[2.190] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[800] rmse=0.018763 lr=0.037550 [1,0]:INFO:root:Epoch[217] Batch[900] Loss[2.743] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[900] rmse=0.018770 lr=0.037440 [1,0]:INFO:root:Epoch[217] Batch[1000] Loss[2.368] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[1000] rmse=0.018761 lr=0.037329 [1,0]:INFO:root:Epoch[217] Batch[1100] Loss[5.036] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[1100] rmse=0.018763 lr=0.037219 [1,0]:INFO:root:Epoch[217] Batch[1200] Loss[2.303] [1,0]:INFO:root:Epoch[217] Rank[0] Batch[1200] rmse=0.018778 lr=0.037109 [1,0]:INFO:root:Epoch[217] Rank[0] Batch[1251] Time cost=399.67 Train-metric=0.018780 [1,0]:INFO:root:Epoch[217] Speed: 3205.24 samples/sec [1,0]:INFO:root:Epoch[218] Batch[100] Loss[2.316] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[100] rmse=0.018654 lr=0.036944 [1,0]:INFO:root:Epoch[218] Batch[200] Loss[3.415] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[200] rmse=0.018691 lr=0.036834 [1,0]:INFO:root:Epoch[218] Batch[300] Loss[3.525] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[300] rmse=0.018691 lr=0.036725 [1,0]:INFO:root:Epoch[218] Batch[400] Loss[2.207] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[400] rmse=0.018705 lr=0.036615 [1,0]:INFO:root:Epoch[218] Batch[500] Loss[2.664] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[500] rmse=0.018717 lr=0.036506 [1,0]:INFO:root:Epoch[218] Batch[600] Loss[2.950] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[600] rmse=0.018721 lr=0.036397 [1,0]:INFO:root:Epoch[218] Batch[700] Loss[2.517] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[700] rmse=0.018725 lr=0.036288 [1,0]:INFO:root:Epoch[218] Batch[800] Loss[2.427] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[800] rmse=0.018736 lr=0.036180 [1,0]:INFO:root:Epoch[218] Batch[900] Loss[2.469] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[900] rmse=0.018745 lr=0.036071 [1,0]:INFO:root:Epoch[218] Batch[1000] Loss[2.339] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[1000] rmse=0.018754 lr=0.035963 [1,0]:INFO:root:Epoch[218] Batch[1100] Loss[2.251] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[1100] rmse=0.018763 lr=0.035854 [1,0]:INFO:root:Epoch[218] Batch[1200] Loss[2.343] [1,0]:INFO:root:Epoch[218] Rank[0] Batch[1200] rmse=0.018769 lr=0.035746 [1,0]:INFO:root:Epoch[218] Rank[0] Batch[1251] Time cost=398.72 Train-metric=0.018765 [1,0]:INFO:root:Epoch[218] Speed: 3212.81 samples/sec [1,0]:INFO:root:Epoch[219] Batch[100] Loss[4.922] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[100] rmse=0.018665 lr=0.035583 [1,0]:INFO:root:Epoch[219] Batch[200] Loss[2.223] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[200] rmse=0.018693 lr=0.035476 [1,0]:INFO:root:Epoch[219] Batch[300] Loss[3.424] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[300] rmse=0.018679 lr=0.035368 [1,0]:INFO:root:Epoch[219] Batch[400] Loss[2.113] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[400] rmse=0.018698 lr=0.035260 [1,0]:INFO:root:Epoch[219] Batch[500] Loss[2.137] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[500] rmse=0.018669 lr=0.035153 [1,0]:INFO:root:Epoch[219] Batch[600] Loss[3.436] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[600] rmse=0.018692 lr=0.035046 [1,0]:INFO:root:Epoch[219] Batch[700] Loss[2.354] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[700] rmse=0.018685 lr=0.034939 [1,0]:INFO:root:Epoch[219] Batch[800] Loss[2.111] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[800] rmse=0.018675 lr=0.034832 [1,0]:INFO:root:Epoch[219] Batch[900] Loss[4.927] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[900] rmse=0.018685 lr=0.034725 [1,0]:INFO:root:Epoch[219] Batch[1000] Loss[2.469] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[1000] rmse=0.018684 lr=0.034619 [1,0]:INFO:root:Epoch[219] Batch[1100] Loss[4.399] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[1100] rmse=0.018701 lr=0.034512 [1,0]:INFO:root:Epoch[219] Batch[1200] Loss[2.396] [1,0]:INFO:root:Epoch[219] Rank[0] Batch[1200] rmse=0.018701 lr=0.034406 [1,0]:INFO:root:Epoch[219] Rank[0] Batch[1251] Time cost=399.47 Train-metric=0.018701 [1,0]:INFO:root:Epoch[219] Speed: 3206.82 samples/sec [1,0]:INFO:root:Epoch[219] Rank[0] Validation-accuracy=0.719280 Validation-top_k_accuracy_5=0.905940 [1,0]:INFO:root:Epoch[220] Batch[100] Loss[2.534] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[100] rmse=0.018389 lr=0.034246 [1,0]:INFO:root:Epoch[220] Batch[200] Loss[4.317] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[200] rmse=0.018540 lr=0.034140 [1,0]:INFO:root:Epoch[220] Batch[300] Loss[2.282] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[300] rmse=0.018579 lr=0.034034 [1,0]:INFO:root:Epoch[220] Batch[400] Loss[2.062] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[400] rmse=0.018575 lr=0.033929 [1,0]:INFO:root:Epoch[220] Batch[500] Loss[4.439] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[500] rmse=0.018587 lr=0.033823 [1,0]:INFO:root:Epoch[220] Batch[600] Loss[3.135] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[600] rmse=0.018601 lr=0.033718 [1,0]:INFO:root:Epoch[220] Batch[700] Loss[4.665] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[700] rmse=0.018613 lr=0.033613 [1,0]:INFO:root:Epoch[220] Batch[800] Loss[3.517] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[800] rmse=0.018634 lr=0.033508 [1,0]:INFO:root:Epoch[220] Batch[900] Loss[2.455] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[900] rmse=0.018646 lr=0.033403 [1,0]:INFO:root:Epoch[220] Batch[1000] Loss[4.268] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[1000] rmse=0.018650 lr=0.033298 [1,0]:INFO:root:Epoch[220] Batch[1100] Loss[2.637] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[1100] rmse=0.018654 lr=0.033194 [1,0]:INFO:root:Epoch[220] Batch[1200] Loss[2.337] [1,0]:INFO:root:Epoch[220] Rank[0] Batch[1200] rmse=0.018662 lr=0.033089 [1,0]:INFO:root:Epoch[220] Rank[0] Batch[1251] Time cost=397.31 Train-metric=0.018666 [1,0]:INFO:root:Epoch[220] Speed: 3224.26 samples/sec [1,0]:INFO:root:Epoch[221] Batch[100] Loss[3.883] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[100] rmse=0.018498 lr=0.032932 [1,0]:INFO:root:Epoch[221] Batch[200] Loss[2.502] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[200] rmse=0.018516 lr=0.032828 [1,0]:INFO:root:Epoch[221] Batch[300] Loss[3.286] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[300] rmse=0.018503 lr=0.032724 [1,0]:INFO:root:Epoch[221] Batch[400] Loss[2.308] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[400] rmse=0.018569 lr=0.032620 [1,0]:INFO:root:Epoch[221] Batch[500] Loss[2.304] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[500] rmse=0.018589 lr=0.032517 [1,0]:INFO:root:Epoch[221] Batch[600] Loss[3.123] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[600] rmse=0.018584 lr=0.032413 [1,0]:INFO:root:Epoch[221] Batch[700] Loss[3.001] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[700] rmse=0.018577 lr=0.032310 [1,0]:INFO:root:Epoch[221] Batch[800] Loss[4.645] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[800] rmse=0.018578 lr=0.032207 [1,0]:INFO:root:Epoch[221] Batch[900] Loss[2.455] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[900] rmse=0.018584 lr=0.032104 [1,0]:INFO:root:Epoch[221] Batch[1000] Loss[2.459] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[1000] rmse=0.018595 lr=0.032001 [1,0]:INFO:root:Epoch[221] Batch[1100] Loss[2.720] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[1100] rmse=0.018602 lr=0.031898 [1,0]:INFO:root:Epoch[221] Batch[1200] Loss[2.302] [1,0]:INFO:root:Epoch[221] Rank[0] Batch[1200] rmse=0.018618 lr=0.031796 [1,0]:INFO:root:Epoch[221] Rank[0] Batch[1251] Time cost=399.24 Train-metric=0.018615 [1,0]:INFO:root:Epoch[221] Speed: 3208.62 samples/sec [1,0]:INFO:root:Epoch[222] Batch[100] Loss[2.204] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[100] rmse=0.018551 lr=0.031641 [1,0]:INFO:root:Epoch[222] Batch[200] Loss[3.333] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[200] rmse=0.018520 lr=0.031539 [1,0]:INFO:root:Epoch[222] Batch[300] Loss[3.352] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[300] rmse=0.018539 lr=0.031437 [1,0]:INFO:root:Epoch[222] Batch[400] Loss[2.037] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[400] rmse=0.018539 lr=0.031335 [1,0]:INFO:root:Epoch[222] Batch[500] Loss[4.405] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[500] rmse=0.018536 lr=0.031234 [1,0]:INFO:root:Epoch[222] Batch[600] Loss[4.667] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[600] rmse=0.018541 lr=0.031132 [1,0]:INFO:root:Epoch[222] Batch[700] Loss[2.595] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[700] rmse=0.018519 lr=0.031031 [1,0]:INFO:root:Epoch[222] Batch[800] Loss[4.149] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[800] rmse=0.018552 lr=0.030929 [1,0]:INFO:root:Epoch[222] Batch[900] Loss[2.503] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[900] rmse=0.018558 lr=0.030828 [1,0]:INFO:root:Epoch[222] Batch[1000] Loss[2.612] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[1000] rmse=0.018580 lr=0.030727 [1,0]:INFO:root:Epoch[222] Batch[1100] Loss[2.208] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[1100] rmse=0.018584 lr=0.030627 [1,0]:INFO:root:Epoch[222] Batch[1200] Loss[4.722] [1,0]:INFO:root:Epoch[222] Rank[0] Batch[1200] rmse=0.018592 lr=0.030526 [1,0]:INFO:root:Epoch[222] Rank[0] Batch[1251] Time cost=399.50 Train-metric=0.018590 [1,0]:INFO:root:Epoch[222] Speed: 3206.55 samples/sec [1,0]:INFO:root:Epoch[223] Batch[100] Loss[2.617] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[100] rmse=0.018343 lr=0.030374 [1,0]:INFO:root:Epoch[223] Batch[200] Loss[2.803] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[200] rmse=0.018509 lr=0.030274 [1,0]:INFO:root:Epoch[223] Batch[300] Loss[2.575] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[300] rmse=0.018451 lr=0.030174 [1,0]:INFO:root:Epoch[223] Batch[400] Loss[2.276] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[400] rmse=0.018456 lr=0.030074 [1,0]:INFO:root:Epoch[223] Batch[500] Loss[4.923] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[500] rmse=0.018464 lr=0.029974 [1,0]:INFO:root:Epoch[223] Batch[600] Loss[2.417] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[600] rmse=0.018483 lr=0.029874 [1,0]:INFO:root:Epoch[223] Batch[700] Loss[2.334] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[700] rmse=0.018506 lr=0.029775 [1,0]:INFO:root:Epoch[223] Batch[800] Loss[4.293] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[800] rmse=0.018500 lr=0.029676 [1,0]:INFO:root:Epoch[223] Batch[900] Loss[2.605] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[900] rmse=0.018506 lr=0.029576 [1,0]:INFO:root:Epoch[223] Batch[1000] Loss[3.259] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[1000] rmse=0.018527 lr=0.029477 [1,0]:INFO:root:Epoch[223] Batch[1100] Loss[2.440] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[1100] rmse=0.018537 lr=0.029379 [1,0]:INFO:root:Epoch[223] Batch[1200] Loss[2.357] [1,0]:INFO:root:Epoch[223] Rank[0] Batch[1200] rmse=0.018551 lr=0.029280 [1,0]:INFO:root:Epoch[223] Rank[0] Batch[1251] Time cost=398.98 Train-metric=0.018554 [1,0]:INFO:root:Epoch[223] Speed: 3210.73 samples/sec [1,0]:INFO:root:Epoch[224] Batch[100] Loss[3.846] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[100] rmse=0.018342 lr=0.029131 [1,0]:INFO:root:Epoch[224] Batch[200] Loss[2.554] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[200] rmse=0.018415 lr=0.029033 [1,0]:INFO:root:Epoch[224] Batch[300] Loss[2.065] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[300] rmse=0.018441 lr=0.028934 [1,0]:INFO:root:Epoch[224] Batch[400] Loss[2.477] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[400] rmse=0.018469 lr=0.028836 [1,0]:INFO:root:Epoch[224] Batch[500] Loss[2.317] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[500] rmse=0.018459 lr=0.028739 [1,0]:INFO:root:Epoch[224] Batch[600] Loss[4.635] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[600] rmse=0.018453 lr=0.028641 [1,0]:INFO:root:Epoch[224] Batch[700] Loss[2.487] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[700] rmse=0.018439 lr=0.028543 [1,0]:INFO:root:Epoch[224] Batch[800] Loss[3.706] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[800] rmse=0.018455 lr=0.028446 [1,0]:INFO:root:Epoch[224] Batch[900] Loss[3.827] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[900] rmse=0.018473 lr=0.028349 [1,0]:INFO:root:Epoch[224] Batch[1000] Loss[2.238] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[1000] rmse=0.018482 lr=0.028251 [1,0]:INFO:root:Epoch[224] Batch[1100] Loss[2.342] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[1100] rmse=0.018493 lr=0.028154 [1,0]:INFO:root:Epoch[224] Batch[1200] Loss[2.278] [1,0]:INFO:root:Epoch[224] Rank[0] Batch[1200] rmse=0.018505 lr=0.028058 [1,0]:INFO:root:Epoch[224] Rank[0] Batch[1251] Time cost=399.92 Train-metric=0.018514 [1,0]:INFO:root:Epoch[224] Speed: 3203.19 samples/sec [1,0]:INFO:root:Epoch[224] Rank[0] Validation-accuracy=0.727260 Validation-top_k_accuracy_5=0.910560 [1,0]:INFO:root:Epoch[225] Batch[100] Loss[2.467] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[100] rmse=0.018521 lr=0.027912 [1,0]:INFO:root:Epoch[225] Batch[200] Loss[4.256] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[200] rmse=0.018534 lr=0.027815 [1,0]:INFO:root:Epoch[225] Batch[300] Loss[2.552] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[300] rmse=0.018523 lr=0.027719 [1,0]:INFO:root:Epoch[225] Batch[400] Loss[3.071] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[400] rmse=0.018502 lr=0.027623 [1,0]:INFO:root:Epoch[225] Batch[500] Loss[2.331] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[500] rmse=0.018504 lr=0.027527 [1,0]:INFO:root:Epoch[225] Batch[600] Loss[2.661] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[600] rmse=0.018538 lr=0.027431 [1,0]:INFO:root:Epoch[225] Batch[700] Loss[2.923] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[700] rmse=0.018547 lr=0.027336 [1,0]:INFO:root:Epoch[225] Batch[800] Loss[3.497] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[800] rmse=0.018548 lr=0.027240 [1,0]:INFO:root:Epoch[225] Batch[900] Loss[2.330] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[900] rmse=0.018536 lr=0.027145 [1,0]:INFO:root:Epoch[225] Batch[1000] Loss[2.331] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[1000] rmse=0.018537 lr=0.027050 [1,0]:INFO:root:Epoch[225] Batch[1100] Loss[4.681] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[1100] rmse=0.018532 lr=0.026955 [1,0]:INFO:root:Epoch[225] Batch[1200] Loss[2.183] [1,0]:INFO:root:Epoch[225] Rank[0] Batch[1200] rmse=0.018540 lr=0.026860 [1,0]:INFO:root:Epoch[225] Rank[0] Batch[1251] Time cost=403.08 Train-metric=0.018539 [1,0]:INFO:root:Epoch[225] Speed: 3178.06 samples/sec [1,0]:INFO:root:Epoch[226] Batch[100] Loss[4.573] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[100] rmse=0.018389 lr=0.026717 [1,0]:INFO:root:Epoch[226] Batch[200] Loss[2.419] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[200] rmse=0.018420 lr=0.026622 [1,0]:INFO:root:Epoch[226] Batch[300] Loss[2.253] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[300] rmse=0.018406 lr=0.026528 [1,0]:INFO:root:Epoch[226] Batch[400] Loss[2.580] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[400] rmse=0.018403 lr=0.026434 [1,0]:INFO:root:Epoch[226] Batch[500] Loss[3.933] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[500] rmse=0.018439 lr=0.026340 [1,0]:INFO:root:Epoch[226] Batch[600] Loss[4.948] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[600] rmse=0.018454 lr=0.026246 [1,0]:INFO:root:Epoch[226] Batch[700] Loss[2.245] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[700] rmse=0.018464 lr=0.026152 [1,0]:INFO:root:Epoch[226] Batch[800] Loss[4.338] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[800] rmse=0.018477 lr=0.026059 [1,0]:INFO:root:Epoch[226] Batch[900] Loss[2.425] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[900] rmse=0.018471 lr=0.025965 [1,0]:INFO:root:Epoch[226] Batch[1000] Loss[2.371] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[1000] rmse=0.018464 lr=0.025872 [1,0]:INFO:root:Epoch[226] Batch[1100] Loss[4.439] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[1100] rmse=0.018472 lr=0.025779 [1,0]:INFO:root:Epoch[226] Batch[1200] Loss[2.609] [1,0]:INFO:root:Epoch[226] Rank[0] Batch[1200] rmse=0.018468 lr=0.025686 [1,0]:INFO:root:Epoch[226] Rank[0] Batch[1251] Time cost=402.85 Train-metric=0.018475 [1,0]:INFO:root:Epoch[226] Speed: 3179.88 samples/sec [1,0]:INFO:root:Epoch[227] Batch[100] Loss[2.304] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[100] rmse=0.018276 lr=0.025546 [1,0]:INFO:root:Epoch[227] Batch[200] Loss[2.310] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[200] rmse=0.018415 lr=0.025453 [1,0]:INFO:root:Epoch[227] Batch[300] Loss[2.264] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[300] rmse=0.018370 lr=0.025361 [1,0]:INFO:root:Epoch[227] Batch[400] Loss[2.144] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[400] rmse=0.018370 lr=0.025269 [1,0]:INFO:root:Epoch[227] Batch[500] Loss[2.189] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[500] rmse=0.018370 lr=0.025177 [1,0]:INFO:root:Epoch[227] Batch[600] Loss[4.051] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[600] rmse=0.018421 lr=0.025085 [1,0]:INFO:root:Epoch[227] Batch[700] Loss[4.592] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[700] rmse=0.018436 lr=0.024993 [1,0]:INFO:root:Epoch[227] Batch[800] Loss[2.027] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[800] rmse=0.018445 lr=0.024901 [1,0]:INFO:root:Epoch[227] Batch[900] Loss[2.229] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[900] rmse=0.018439 lr=0.024810 [1,0]:INFO:root:Epoch[227] Batch[1000] Loss[4.676] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[1000] rmse=0.018443 lr=0.024719 [1,0]:INFO:root:Epoch[227] Batch[1100] Loss[2.543] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[1100] rmse=0.018456 lr=0.024628 [1,0]:INFO:root:Epoch[227] Batch[1200] Loss[2.212] [1,0]:INFO:root:Epoch[227] Rank[0] Batch[1200] rmse=0.018470 lr=0.024537 [1,0]:INFO:root:Epoch[227] Rank[0] Batch[1251] Time cost=404.24 Train-metric=0.018459 [1,0]:INFO:root:Epoch[227] Speed: 3169.00 samples/sec [1,0]:INFO:root:Epoch[228] Batch[100] Loss[2.242] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[100] rmse=0.018395 lr=0.024400 [1,0]:INFO:root:Epoch[228] Batch[200] Loss[2.337] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[200] rmse=0.018345 lr=0.024309 [1,0]:INFO:root:Epoch[228] Batch[300] Loss[2.328] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[300] rmse=0.018365 lr=0.024219 [1,0]:INFO:root:Epoch[228] Batch[400] Loss[2.125] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[400] rmse=0.018391 lr=0.024128 [1,0]:INFO:root:Epoch[228] Batch[500] Loss[2.572] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[500] rmse=0.018401 lr=0.024038 [1,0]:INFO:root:Epoch[228] Batch[600] Loss[3.847] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[600] rmse=0.018392 lr=0.023948 [1,0]:INFO:root:Epoch[228] Batch[700] Loss[2.307] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[700] rmse=0.018397 lr=0.023859 [1,0]:INFO:root:Epoch[228] Batch[800] Loss[2.427] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[800] rmse=0.018411 lr=0.023769 [1,0]:INFO:root:Epoch[228] Batch[900] Loss[2.795] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[900] rmse=0.018414 lr=0.023680 [1,0]:INFO:root:Epoch[228] Batch[1000] Loss[4.395] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[1000] rmse=0.018433 lr=0.023590 [1,0]:INFO:root:Epoch[228] Batch[1100] Loss[2.368] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[1100] rmse=0.018426 lr=0.023501 [1,0]:INFO:root:Epoch[228] Batch[1200] Loss[2.377] [1,0]:INFO:root:Epoch[228] Rank[0] Batch[1200] rmse=0.018434 lr=0.023412 [1,0]:INFO:root:Epoch[228] Rank[0] Batch[1251] Time cost=401.70 Train-metric=0.018435 [1,0]:INFO:root:Epoch[228] Speed: 3189.03 samples/sec [1,0]:INFO:root:Epoch[229] Batch[100] Loss[4.314] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[100] rmse=0.018317 lr=0.023278 [1,0]:INFO:root:Epoch[229] Batch[200] Loss[4.020] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[200] rmse=0.018333 lr=0.023189 [1,0]:INFO:root:Epoch[229] Batch[300] Loss[2.749] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[300] rmse=0.018316 lr=0.023101 [1,0]:INFO:root:Epoch[229] Batch[400] Loss[2.383] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[400] rmse=0.018332 lr=0.023013 [1,0]:INFO:root:Epoch[229] Batch[500] Loss[2.034] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[500] rmse=0.018324 lr=0.022925 [1,0]:INFO:root:Epoch[229] Batch[600] Loss[2.349] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[600] rmse=0.018306 lr=0.022837 [1,0]:INFO:root:Epoch[229] Batch[700] Loss[2.553] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[700] rmse=0.018327 lr=0.022749 [1,0]:INFO:root:Epoch[229] Batch[800] Loss[3.021] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[800] rmse=0.018345 lr=0.022661 [1,0]:INFO:root:Epoch[229] Batch[900] Loss[4.713] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[900] rmse=0.018338 lr=0.022574 [1,0]:INFO:root:Epoch[229] Batch[1000] Loss[2.282] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[1000] rmse=0.018348 lr=0.022486 [1,0]:INFO:root:Epoch[229] Batch[1100] Loss[3.603] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[1100] rmse=0.018363 lr=0.022399 [1,0]:INFO:root:Epoch[229] Batch[1200] Loss[2.377] [1,0]:INFO:root:Epoch[229] Rank[0] Batch[1200] rmse=0.018362 lr=0.022312 [1,0]:INFO:root:Epoch[229] Rank[0] Batch[1251] Time cost=399.79 Train-metric=0.018359 [1,0]:INFO:root:Epoch[229] Speed: 3204.24 samples/sec [1,0]:INFO:root:Epoch[229] Rank[0] Validation-accuracy=0.732360 Validation-top_k_accuracy_5=0.910980 [1,0]:INFO:root:Epoch[230] Batch[100] Loss[2.357] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[100] rmse=0.018225 lr=0.022181 [1,0]:INFO:root:Epoch[230] Batch[200] Loss[2.338] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[200] rmse=0.018280 lr=0.022095 [1,0]:INFO:root:Epoch[230] Batch[300] Loss[2.516] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[300] rmse=0.018285 lr=0.022008 [1,0]:INFO:root:Epoch[230] Batch[400] Loss[2.174] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[400] rmse=0.018304 lr=0.021922 [1,0]:INFO:root:Epoch[230] Batch[500] Loss[2.237] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[500] rmse=0.018330 lr=0.021836 [1,0]:INFO:root:Epoch[230] Batch[600] Loss[2.116] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[600] rmse=0.018307 lr=0.021750 [1,0]:INFO:root:Epoch[230] Batch[700] Loss[3.200] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[700] rmse=0.018309 lr=0.021664 [1,0]:INFO:root:Epoch[230] Batch[800] Loss[4.145] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[800] rmse=0.018298 lr=0.021578 [1,0]:INFO:root:Epoch[230] Batch[900] Loss[4.928] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[900] rmse=0.018304 lr=0.021493 [1,0]:INFO:root:Epoch[230] Batch[1000] Loss[2.424] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[1000] rmse=0.018309 lr=0.021407 [1,0]:INFO:root:Epoch[230] Batch[1100] Loss[2.494] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[1100] rmse=0.018323 lr=0.021322 [1,0]:INFO:root:Epoch[230] Batch[1200] Loss[2.390] [1,0]:INFO:root:Epoch[230] Rank[0] Batch[1200] rmse=0.018337 lr=0.021237 [1,0]:INFO:root:Epoch[230] Rank[0] Batch[1251] Time cost=399.09 Train-metric=0.018334 [1,0]:INFO:root:Epoch[230] Speed: 3209.90 samples/sec [1,0]:INFO:root:Epoch[231] Batch[100] Loss[2.309] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[100] rmse=0.018237 lr=0.021109 [1,0]:INFO:root:Epoch[231] Batch[200] Loss[2.860] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[200] rmse=0.018275 lr=0.021025 [1,0]:INFO:root:Epoch[231] Batch[300] Loss[2.761] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[300] rmse=0.018242 lr=0.020940 [1,0]:INFO:root:Epoch[231] Batch[400] Loss[2.371] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[400] rmse=0.018253 lr=0.020856 [1,0]:INFO:root:Epoch[231] Batch[500] Loss[3.828] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[500] rmse=0.018246 lr=0.020772 [1,0]:INFO:root:Epoch[231] Batch[600] Loss[2.403] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[600] rmse=0.018259 lr=0.020688 [1,0]:INFO:root:Epoch[231] Batch[700] Loss[4.090] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[700] rmse=0.018283 lr=0.020604 [1,0]:INFO:root:Epoch[231] Batch[800] Loss[4.892] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[800] rmse=0.018285 lr=0.020520 [1,0]:INFO:root:Epoch[231] Batch[900] Loss[4.623] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[900] rmse=0.018289 lr=0.020437 [1,0]:INFO:root:Epoch[231] Batch[1000] Loss[2.218] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[1000] rmse=0.018299 lr=0.020354 [1,0]:INFO:root:Epoch[231] Batch[1100] Loss[2.169] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[1100] rmse=0.018295 lr=0.020270 [1,0]:INFO:root:Epoch[231] Batch[1200] Loss[2.286] [1,0]:INFO:root:Epoch[231] Rank[0] Batch[1200] rmse=0.018296 lr=0.020187 [1,0]:INFO:root:Epoch[231] Rank[0] Batch[1251] Time cost=399.34 Train-metric=0.018305 [1,0]:INFO:root:Epoch[231] Speed: 3207.84 samples/sec [1,0]:INFO:root:Epoch[232] Batch[100] Loss[2.446] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[100] rmse=0.018154 lr=0.020062 [1,0]:INFO:root:Epoch[232] Batch[200] Loss[2.968] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[200] rmse=0.018203 lr=0.019980 [1,0]:INFO:root:Epoch[232] Batch[300] Loss[2.861] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[300] rmse=0.018201 lr=0.019897 [1,0]:INFO:root:Epoch[232] Batch[400] Loss[2.220] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[400] rmse=0.018222 lr=0.019815 [1,0]:INFO:root:Epoch[232] Batch[500] Loss[3.471] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[500] rmse=0.018249 lr=0.019733 [1,0]:INFO:root:Epoch[232] Batch[600] Loss[2.147] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[600] rmse=0.018263 lr=0.019651 [1,0]:INFO:root:Epoch[232] Batch[700] Loss[3.073] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[700] rmse=0.018269 lr=0.019569 [1,0]:INFO:root:Epoch[232] Batch[800] Loss[2.364] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[800] rmse=0.018280 lr=0.019488 [1,0]:INFO:root:Epoch[232] Batch[900] Loss[2.583] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[900] rmse=0.018273 lr=0.019406 [1,0]:INFO:root:Epoch[232] Batch[1000] Loss[4.621] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[1000] rmse=0.018275 lr=0.019325 [1,0]:INFO:root:Epoch[232] Batch[1100] Loss[2.644] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[1100] rmse=0.018277 lr=0.019244 [1,0]:INFO:root:Epoch[232] Batch[1200] Loss[2.341] [1,0]:INFO:root:Epoch[232] Rank[0] Batch[1200] rmse=0.018271 lr=0.019163 [1,0]:INFO:root:Epoch[232] Rank[0] Batch[1251] Time cost=399.78 Train-metric=0.018269 [1,0]:INFO:root:Epoch[232] Speed: 3204.33 samples/sec [1,0]:INFO:root:Epoch[233] Batch[100] Loss[2.814] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[100] rmse=0.018117 lr=0.019041 [1,0]:INFO:root:Epoch[233] Batch[200] Loss[2.364] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[200] rmse=0.018136 lr=0.018960 [1,0]:INFO:root:Epoch[233] Batch[300] Loss[2.449] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[300] rmse=0.018157 lr=0.018880 [1,0]:INFO:root:Epoch[233] Batch[400] Loss[2.231] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[400] rmse=0.018189 lr=0.018800 [1,0]:INFO:root:Epoch[233] Batch[500] Loss[2.304] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[500] rmse=0.018205 lr=0.018720 [1,0]:INFO:root:Epoch[233] Batch[600] Loss[3.311] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[600] rmse=0.018195 lr=0.018640 [1,0]:INFO:root:Epoch[233] Batch[700] Loss[2.046] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[700] rmse=0.018184 lr=0.018560 [1,0]:INFO:root:Epoch[233] Batch[800] Loss[2.617] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[800] rmse=0.018187 lr=0.018480 [1,0]:INFO:root:Epoch[233] Batch[900] Loss[2.027] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[900] rmse=0.018189 lr=0.018401 [1,0]:INFO:root:Epoch[233] Batch[1000] Loss[2.361] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[1000] rmse=0.018185 lr=0.018322 [1,0]:INFO:root:Epoch[233] Batch[1100] Loss[2.139] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[1100] rmse=0.018200 lr=0.018243 [1,0]:INFO:root:Epoch[233] Batch[1200] Loss[2.361] [1,0]:INFO:root:Epoch[233] Rank[0] Batch[1200] rmse=0.018204 lr=0.018164 [1,0]:INFO:root:Epoch[233] Rank[0] Batch[1251] Time cost=399.16 Train-metric=0.018199 [1,0]:INFO:root:Epoch[233] Speed: 3209.29 samples/sec [1,0]:INFO:root:Epoch[234] Batch[100] Loss[2.418] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[100] rmse=0.018117 lr=0.018045 [1,0]:INFO:root:Epoch[234] Batch[200] Loss[3.945] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[200] rmse=0.018101 lr=0.017966 [1,0]:INFO:root:Epoch[234] Batch[300] Loss[4.423] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[300] rmse=0.018143 lr=0.017888 [1,0]:INFO:root:Epoch[234] Batch[400] Loss[3.033] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[400] rmse=0.018147 lr=0.017810 [1,0]:INFO:root:Epoch[234] Batch[500] Loss[2.512] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[500] rmse=0.018134 lr=0.017732 [1,0]:INFO:root:Epoch[234] Batch[600] Loss[2.789] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[600] rmse=0.018154 lr=0.017654 [1,0]:INFO:root:Epoch[234] Batch[700] Loss[2.322] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[700] rmse=0.018172 lr=0.017576 [1,0]:INFO:root:Epoch[234] Batch[800] Loss[2.351] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[800] rmse=0.018176 lr=0.017498 [1,0]:INFO:root:Epoch[234] Batch[900] Loss[4.293] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[900] rmse=0.018197 lr=0.017421 [1,0]:INFO:root:Epoch[234] Batch[1000] Loss[2.262] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[1000] rmse=0.018206 lr=0.017344 [1,0]:INFO:root:Epoch[234] Batch[1100] Loss[2.305] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[1100] rmse=0.018201 lr=0.017267 [1,0]:INFO:root:Epoch[234] Batch[1200] Loss[2.811] [1,0]:INFO:root:Epoch[234] Rank[0] Batch[1200] rmse=0.018208 lr=0.017190 [1,0]:INFO:root:Epoch[234] Rank[0] Batch[1251] Time cost=399.51 Train-metric=0.018198 [1,0]:INFO:root:Epoch[234] Speed: 3206.48 samples/sec [1,0]:INFO:root:Epoch[234] Rank[0] Validation-accuracy=0.734900 Validation-top_k_accuracy_5=0.915180 [1,0]:INFO:root:Epoch[235] Batch[100] Loss[2.991] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[100] rmse=0.018036 lr=0.017074 [1,0]:INFO:root:Epoch[235] Batch[200] Loss[2.623] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[200] rmse=0.018189 lr=0.016998 [1,0]:INFO:root:Epoch[235] Batch[300] Loss[2.968] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[300] rmse=0.018169 lr=0.016921 [1,0]:INFO:root:Epoch[235] Batch[400] Loss[2.585] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[400] rmse=0.018138 lr=0.016845 [1,0]:INFO:root:Epoch[235] Batch[500] Loss[2.554] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[500] rmse=0.018133 lr=0.016769 [1,0]:INFO:root:Epoch[235] Batch[600] Loss[2.602] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[600] rmse=0.018111 lr=0.016693 [1,0]:INFO:root:Epoch[235] Batch[700] Loss[2.927] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[700] rmse=0.018119 lr=0.016618 [1,0]:INFO:root:Epoch[235] Batch[800] Loss[2.169] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[800] rmse=0.018133 lr=0.016542 [1,0]:INFO:root:Epoch[235] Batch[900] Loss[2.480] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[900] rmse=0.018129 lr=0.016467 [1,0]:INFO:root:Epoch[235] Batch[1000] Loss[4.923] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[1000] rmse=0.018134 lr=0.016392 [1,0]:INFO:root:Epoch[235] Batch[1100] Loss[2.419] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[1100] rmse=0.018126 lr=0.016317 [1,0]:INFO:root:Epoch[235] Batch[1200] Loss[4.569] [1,0]:INFO:root:Epoch[235] Rank[0] Batch[1200] rmse=0.018140 lr=0.016242 [1,0]:INFO:root:Epoch[235] Rank[0] Batch[1251] Time cost=398.90 Train-metric=0.018144 [1,0]:INFO:root:Epoch[235] Speed: 3211.42 samples/sec [1,0]:INFO:root:Epoch[236] Batch[100] Loss[2.374] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[100] rmse=0.018061 lr=0.016129 [1,0]:INFO:root:Epoch[236] Batch[200] Loss[2.193] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[200] rmse=0.018095 lr=0.016055 [1,0]:INFO:root:Epoch[236] Batch[300] Loss[4.856] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[300] rmse=0.018139 lr=0.015980 [1,0]:INFO:root:Epoch[236] Batch[400] Loss[2.194] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[400] rmse=0.018119 lr=0.015906 [1,0]:INFO:root:Epoch[236] Batch[500] Loss[3.484] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[500] rmse=0.018105 lr=0.015832 [1,0]:INFO:root:Epoch[236] Batch[600] Loss[2.265] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[600] rmse=0.018107 lr=0.015759 [1,0]:INFO:root:Epoch[236] Batch[700] Loss[3.747] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[700] rmse=0.018105 lr=0.015685 [1,0]:INFO:root:Epoch[236] Batch[800] Loss[2.126] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[800] rmse=0.018108 lr=0.015612 [1,0]:INFO:root:Epoch[236] Batch[900] Loss[3.078] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[900] rmse=0.018113 lr=0.015538 [1,0]:INFO:root:Epoch[236] Batch[1000] Loss[2.314] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[1000] rmse=0.018120 lr=0.015465 [1,0]:INFO:root:Epoch[236] Batch[1100] Loss[3.123] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[1100] rmse=0.018124 lr=0.015392 [1,0]:INFO:root:Epoch[236] Batch[1200] Loss[4.294] [1,0]:INFO:root:Epoch[236] Rank[0] Batch[1200] rmse=0.018121 lr=0.015320 [1,0]:INFO:root:Epoch[236] Rank[0] Batch[1251] Time cost=399.14 Train-metric=0.018127 [1,0]:INFO:root:Epoch[236] Speed: 3209.44 samples/sec [1,0]:INFO:root:Epoch[237] Batch[100] Loss[2.633] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[100] rmse=0.017955 lr=0.015210 [1,0]:INFO:root:Epoch[237] Batch[200] Loss[2.295] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[200] rmse=0.018030 lr=0.015138 [1,0]:INFO:root:Epoch[237] Batch[300] Loss[3.551] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[300] rmse=0.018050 lr=0.015065 [1,0]:INFO:root:Epoch[237] Batch[400] Loss[2.163] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[400] rmse=0.018036 lr=0.014993 [1,0]:INFO:root:Epoch[237] Batch[500] Loss[2.257] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[500] rmse=0.018077 lr=0.014922 [1,0]:INFO:root:Epoch[237] Batch[600] Loss[2.241] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[600] rmse=0.018081 lr=0.014850 [1,0]:INFO:root:Epoch[237] Batch[700] Loss[3.879] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[700] rmse=0.018073 lr=0.014778 [1,0]:INFO:root:Epoch[237] Batch[800] Loss[2.259] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[800] rmse=0.018081 lr=0.014707 [1,0]:INFO:root:Epoch[237] Batch[900] Loss[4.666] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[900] rmse=0.018077 lr=0.014636 [1,0]:INFO:root:Epoch[237] Batch[1000] Loss[2.930] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[1000] rmse=0.018084 lr=0.014565 [1,0]:INFO:root:Epoch[237] Batch[1100] Loss[4.466] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[1100] rmse=0.018080 lr=0.014494 [1,0]:INFO:root:Epoch[237] Batch[1200] Loss[2.644] [1,0]:INFO:root:Epoch[237] Rank[0] Batch[1200] rmse=0.018075 lr=0.014423 [1,0]:INFO:root:Epoch[237] Rank[0] Batch[1251] Time cost=400.20 Train-metric=0.018082 [1,0]:INFO:root:Epoch[237] Speed: 3200.99 samples/sec [1,0]:INFO:root:Epoch[238] Batch[100] Loss[2.153] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[100] rmse=0.017962 lr=0.014317 [1,0]:INFO:root:Epoch[238] Batch[200] Loss[2.582] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[200] rmse=0.017922 lr=0.014246 [1,0]:INFO:root:Epoch[238] Batch[300] Loss[3.679] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[300] rmse=0.017980 lr=0.014176 [1,0]:INFO:root:Epoch[238] Batch[400] Loss[1.962] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[400] rmse=0.017984 lr=0.014106 [1,0]:INFO:root:Epoch[238] Batch[500] Loss[2.659] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[500] rmse=0.018000 lr=0.014037 [1,0]:INFO:root:Epoch[238] Batch[600] Loss[4.465] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[600] rmse=0.018002 lr=0.013967 [1,0]:INFO:root:Epoch[238] Batch[700] Loss[2.143] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[700] rmse=0.018031 lr=0.013898 [1,0]:INFO:root:Epoch[238] Batch[800] Loss[3.870] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[800] rmse=0.018039 lr=0.013828 [1,0]:INFO:root:Epoch[238] Batch[900] Loss[2.426] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[900] rmse=0.018035 lr=0.013759 [1,0]:INFO:root:Epoch[238] Batch[1000] Loss[2.663] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[1000] rmse=0.018038 lr=0.013690 [1,0]:INFO:root:Epoch[238] Batch[1100] Loss[2.316] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[1100] rmse=0.018023 lr=0.013621 [1,0]:INFO:root:Epoch[238] Batch[1200] Loss[2.081] [1,0]:INFO:root:Epoch[238] Rank[0] Batch[1200] rmse=0.018033 lr=0.013553 [1,0]:INFO:root:Epoch[238] Rank[0] Batch[1251] Time cost=399.42 Train-metric=0.018037 [1,0]:INFO:root:Epoch[238] Speed: 3207.20 samples/sec [1,0]:INFO:root:Epoch[239] Batch[100] Loss[2.695] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[100] rmse=0.017806 lr=0.013450 [1,0]:INFO:root:Epoch[239] Batch[200] Loss[3.960] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[200] rmse=0.017864 lr=0.013381 [1,0]:INFO:root:Epoch[239] Batch[300] Loss[2.395] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[300] rmse=0.017905 lr=0.013313 [1,0]:INFO:root:Epoch[239] Batch[400] Loss[3.643] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[400] rmse=0.017921 lr=0.013245 [1,0]:INFO:root:Epoch[239] Batch[500] Loss[2.201] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[500] rmse=0.017956 lr=0.013178 [1,0]:INFO:root:Epoch[239] Batch[600] Loss[4.658] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[600] rmse=0.017982 lr=0.013110 [1,0]:INFO:root:Epoch[239] Batch[700] Loss[2.289] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[700] rmse=0.017991 lr=0.013043 [1,0]:INFO:root:Epoch[239] Batch[800] Loss[2.548] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[800] rmse=0.017986 lr=0.012976 [1,0]:INFO:root:Epoch[239] Batch[900] Loss[3.650] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[900] rmse=0.017989 lr=0.012909 [1,0]:INFO:root:Epoch[239] Batch[1000] Loss[2.307] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[1000] rmse=0.017974 lr=0.012842 [1,0]:INFO:root:Epoch[239] Batch[1100] Loss[4.391] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[1100] rmse=0.017976 lr=0.012775 [1,0]:INFO:root:Epoch[239] Batch[1200] Loss[2.130] [1,0]:INFO:root:Epoch[239] Rank[0] Batch[1200] rmse=0.017992 lr=0.012709 [1,0]:INFO:root:Epoch[239] Rank[0] Batch[1251] Time cost=398.72 Train-metric=0.017997 [1,0]:INFO:root:Epoch[239] Speed: 3212.81 samples/sec [1,0]:INFO:root:Epoch[239] Rank[0] Validation-accuracy=0.741840 Validation-top_k_accuracy_5=0.918000 [1,0]:INFO:root:Epoch[240] Batch[100] Loss[2.392] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[100] rmse=0.017917 lr=0.012609 [1,0]:INFO:root:Epoch[240] Batch[200] Loss[2.570] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[200] rmse=0.017938 lr=0.012542 [1,0]:INFO:root:Epoch[240] Batch[300] Loss[2.310] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[300] rmse=0.017920 lr=0.012477 [1,0]:INFO:root:Epoch[240] Batch[400] Loss[2.254] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[400] rmse=0.017909 lr=0.012411 [1,0]:INFO:root:Epoch[240] Batch[500] Loss[2.271] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[500] rmse=0.017942 lr=0.012345 [1,0]:INFO:root:Epoch[240] Batch[600] Loss[2.336] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[600] rmse=0.017938 lr=0.012280 [1,0]:INFO:root:Epoch[240] Batch[700] Loss[2.321] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[700] rmse=0.017962 lr=0.012215 [1,0]:INFO:root:Epoch[240] Batch[800] Loss[2.175] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[800] rmse=0.017963 lr=0.012149 [1,0]:INFO:root:Epoch[240] Batch[900] Loss[2.155] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[900] rmse=0.017975 lr=0.012085 [1,0]:INFO:root:Epoch[240] Batch[1000] Loss[3.330] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[1000] rmse=0.017988 lr=0.012020 [1,0]:INFO:root:Epoch[240] Batch[1100] Loss[4.544] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[1100] rmse=0.017981 lr=0.011955 [1,0]:INFO:root:Epoch[240] Batch[1200] Loss[2.203] [1,0]:INFO:root:Epoch[240] Rank[0] Batch[1200] rmse=0.017977 lr=0.011891 [1,0]:INFO:root:Epoch[240] Rank[0] Batch[1251] Time cost=397.51 Train-metric=0.017970 [1,0]:INFO:root:Epoch[240] Speed: 3222.64 samples/sec [1,0]:INFO:root:Epoch[241] Batch[100] Loss[4.880] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[100] rmse=0.017921 lr=0.011794 [1,0]:INFO:root:Epoch[241] Batch[200] Loss[1.931] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[200] rmse=0.017940 lr=0.011730 [1,0]:INFO:root:Epoch[241] Batch[300] Loss[2.959] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[300] rmse=0.017949 lr=0.011666 [1,0]:INFO:root:Epoch[241] Batch[400] Loss[4.788] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[400] rmse=0.017967 lr=0.011602 [1,0]:INFO:root:Epoch[241] Batch[500] Loss[2.046] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[500] rmse=0.017943 lr=0.011539 [1,0]:INFO:root:Epoch[241] Batch[600] Loss[2.458] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[600] rmse=0.017948 lr=0.011476 [1,0]:INFO:root:Epoch[241] Batch[700] Loss[2.283] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[700] rmse=0.017929 lr=0.011412 [1,0]:INFO:root:Epoch[241] Batch[800] Loss[2.146] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[800] rmse=0.017937 lr=0.011349 [1,0]:INFO:root:Epoch[241] Batch[900] Loss[2.445] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[900] rmse=0.017927 lr=0.011287 [1,0]:INFO:root:Epoch[241] Batch[1000] Loss[2.792] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[1000] rmse=0.017926 lr=0.011224 [1,0]:INFO:root:Epoch[241] Batch[1100] Loss[3.178] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[1100] rmse=0.017929 lr=0.011162 [1,0]:INFO:root:Epoch[241] Batch[1200] Loss[2.021] [1,0]:INFO:root:Epoch[241] Rank[0] Batch[1200] rmse=0.017925 lr=0.011099 [1,0]:INFO:root:Epoch[241] Rank[0] Batch[1251] Time cost=399.30 Train-metric=0.017927 [1,0]:INFO:root:Epoch[241] Speed: 3208.19 samples/sec [1,0]:INFO:root:Epoch[242] Batch[100] Loss[2.023] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[100] rmse=0.017846 lr=0.011006 [1,0]:INFO:root:Epoch[242] Batch[200] Loss[2.332] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[200] rmse=0.017845 lr=0.010944 [1,0]:INFO:root:Epoch[242] Batch[300] Loss[4.760] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[300] rmse=0.017855 lr=0.010882 [1,0]:INFO:root:Epoch[242] Batch[400] Loss[2.234] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[400] rmse=0.017844 lr=0.010820 [1,0]:INFO:root:Epoch[242] Batch[500] Loss[4.475] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[500] rmse=0.017858 lr=0.010759 [1,0]:INFO:root:Epoch[242] Batch[600] Loss[3.502] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[600] rmse=0.017867 lr=0.010698 [1,0]:INFO:root:Epoch[242] Batch[700] Loss[4.662] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[700] rmse=0.017884 lr=0.010637 [1,0]:INFO:root:Epoch[242] Batch[800] Loss[2.128] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[800] rmse=0.017883 lr=0.010576 [1,0]:INFO:root:Epoch[242] Batch[900] Loss[2.270] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[900] rmse=0.017894 lr=0.010515 [1,0]:INFO:root:Epoch[242] Batch[1000] Loss[2.407] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[1000] rmse=0.017910 lr=0.010455 [1,0]:INFO:root:Epoch[242] Batch[1100] Loss[3.488] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[1100] rmse=0.017904 lr=0.010394 [1,0]:INFO:root:Epoch[242] Batch[1200] Loss[2.221] [1,0]:INFO:root:Epoch[242] Rank[0] Batch[1200] rmse=0.017905 lr=0.010334 [1,0]:INFO:root:Epoch[242] Rank[0] Batch[1251] Time cost=399.94 Train-metric=0.017897 [1,0]:INFO:root:Epoch[242] Speed: 3203.08 samples/sec [1,0]:INFO:root:Epoch[243] Batch[100] Loss[2.283] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[100] rmse=0.017951 lr=0.010244 [1,0]:INFO:root:Epoch[243] Batch[200] Loss[2.223] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[200] rmse=0.017888 lr=0.010184 [1,0]:INFO:root:Epoch[243] Batch[300] Loss[4.650] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[300] rmse=0.017893 lr=0.010124 [1,0]:INFO:root:Epoch[243] Batch[400] Loss[2.169] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[400] rmse=0.017905 lr=0.010065 [1,0]:INFO:root:Epoch[243] Batch[500] Loss[4.900] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[500] rmse=0.017869 lr=0.010006 [1,0]:INFO:root:Epoch[243] Batch[600] Loss[3.333] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[600] rmse=0.017852 lr=0.009947 [1,0]:INFO:root:Epoch[243] Batch[700] Loss[3.512] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[700] rmse=0.017829 lr=0.009888 [1,0]:INFO:root:Epoch[243] Batch[800] Loss[2.269] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[800] rmse=0.017821 lr=0.009829 [1,0]:INFO:root:Epoch[243] Batch[900] Loss[2.306] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[900] rmse=0.017837 lr=0.009771 [1,0]:INFO:root:Epoch[243] Batch[1000] Loss[4.639] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[1000] rmse=0.017844 lr=0.009712 [1,0]:INFO:root:Epoch[243] Batch[1100] Loss[2.575] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[1100] rmse=0.017833 lr=0.009654 [1,0]:INFO:root:Epoch[243] Batch[1200] Loss[2.644] [1,0]:INFO:root:Epoch[243] Rank[0] Batch[1200] rmse=0.017834 lr=0.009596 [1,0]:INFO:root:Epoch[243] Rank[0] Batch[1251] Time cost=398.95 Train-metric=0.017839 [1,0]:INFO:root:Epoch[243] Speed: 3211.01 samples/sec [1,0]:INFO:root:Epoch[244] Batch[100] Loss[2.411] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[100] rmse=0.017812 lr=0.009509 [1,0]:INFO:root:Epoch[244] Batch[200] Loss[4.755] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[200] rmse=0.017844 lr=0.009451 [1,0]:INFO:root:Epoch[244] Batch[300] Loss[2.132] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[300] rmse=0.017815 lr=0.009394 [1,0]:INFO:root:Epoch[244] Batch[400] Loss[4.749] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[400] rmse=0.017839 lr=0.009336 [1,0]:INFO:root:Epoch[244] Batch[500] Loss[4.510] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[500] rmse=0.017833 lr=0.009279 [1,0]:INFO:root:Epoch[244] Batch[600] Loss[2.358] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[600] rmse=0.017859 lr=0.009222 [1,0]:INFO:root:Epoch[244] Batch[700] Loss[2.114] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[700] rmse=0.017841 lr=0.009165 [1,0]:INFO:root:Epoch[244] Batch[800] Loss[4.755] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[800] rmse=0.017839 lr=0.009109 [1,0]:INFO:root:Epoch[244] Batch[900] Loss[2.244] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[900] rmse=0.017844 lr=0.009052 [1,0]:INFO:root:Epoch[244] Batch[1000] Loss[2.814] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[1000] rmse=0.017856 lr=0.008996 [1,0]:INFO:root:Epoch[244] Batch[1100] Loss[2.736] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[1100] rmse=0.017847 lr=0.008940 [1,0]:INFO:root:Epoch[244] Batch[1200] Loss[2.093] [1,0]:INFO:root:Epoch[244] Rank[0] Batch[1200] rmse=0.017842 lr=0.008884 [1,0]:INFO:root:Epoch[244] Rank[0] Batch[1251] Time cost=400.08 Train-metric=0.017843 [1,0]:INFO:root:Epoch[244] Speed: 3201.95 samples/sec [1,0]:INFO:root:Epoch[244] Rank[0] Validation-accuracy=0.747260 Validation-top_k_accuracy_5=0.920760 [1,0]:INFO:root:Epoch[245] Batch[100] Loss[2.231] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[100] rmse=0.017810 lr=0.008800 [1,0]:INFO:root:Epoch[245] Batch[200] Loss[3.150] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[200] rmse=0.017774 lr=0.008745 [1,0]:INFO:root:Epoch[245] Batch[300] Loss[4.761] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[300] rmse=0.017747 lr=0.008689 [1,0]:INFO:root:Epoch[245] Batch[400] Loss[3.325] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[400] rmse=0.017724 lr=0.008634 [1,0]:INFO:root:Epoch[245] Batch[500] Loss[2.245] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[500] rmse=0.017713 lr=0.008579 [1,0]:INFO:root:Epoch[245] Batch[600] Loss[2.958] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[600] rmse=0.017735 lr=0.008525 [1,0]:INFO:root:Epoch[245] Batch[700] Loss[2.488] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[700] rmse=0.017746 lr=0.008470 [1,0]:INFO:root:Epoch[245] Batch[800] Loss[2.159] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[800] rmse=0.017759 lr=0.008415 [1,0]:INFO:root:Epoch[245] Batch[900] Loss[2.176] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[900] rmse=0.017742 lr=0.008361 [1,0]:INFO:root:Epoch[245] Batch[1000] Loss[2.207] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[1000] rmse=0.017756 lr=0.008307 [1,0]:INFO:root:Epoch[245] Batch[1100] Loss[2.223] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[1100] rmse=0.017766 lr=0.008253 [1,0]:INFO:root:Epoch[245] Batch[1200] Loss[4.611] [1,0]:INFO:root:Epoch[245] Rank[0] Batch[1200] rmse=0.017770 lr=0.008199 [1,0]:INFO:root:Epoch[245] Rank[0] Batch[1251] Time cost=398.41 Train-metric=0.017779 [1,0]:INFO:root:Epoch[245] Speed: 3215.33 samples/sec [1,0]:INFO:root:Epoch[246] Batch[100] Loss[2.218] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[100] rmse=0.017727 lr=0.008119 [1,0]:INFO:root:Epoch[246] Batch[200] Loss[3.928] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[200] rmse=0.017739 lr=0.008065 [1,0]:INFO:root:Epoch[246] Batch[300] Loss[2.328] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[300] rmse=0.017768 lr=0.008012 [1,0]:INFO:root:Epoch[246] Batch[400] Loss[2.820] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[400] rmse=0.017732 lr=0.007959 [1,0]:INFO:root:Epoch[246] Batch[500] Loss[4.676] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[500] rmse=0.017719 lr=0.007906 [1,0]:INFO:root:Epoch[246] Batch[600] Loss[4.174] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[600] rmse=0.017723 lr=0.007854 [1,0]:INFO:root:Epoch[246] Batch[700] Loss[2.102] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[700] rmse=0.017735 lr=0.007801 [1,0]:INFO:root:Epoch[246] Batch[800] Loss[2.479] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[800] rmse=0.017733 lr=0.007749 [1,0]:INFO:root:Epoch[246] Batch[900] Loss[4.796] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[900] rmse=0.017713 lr=0.007697 [1,0]:INFO:root:Epoch[246] Batch[1000] Loss[4.075] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[1000] rmse=0.017721 lr=0.007645 [1,0]:INFO:root:Epoch[246] Batch[1100] Loss[2.530] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[1100] rmse=0.017725 lr=0.007593 [1,0]:INFO:root:Epoch[246] Batch[1200] Loss[4.610] [1,0]:INFO:root:Epoch[246] Rank[0] Batch[1200] rmse=0.017736 lr=0.007542 [1,0]:INFO:root:Epoch[246] Rank[0] Batch[1251] Time cost=398.93 Train-metric=0.017733 [1,0]:INFO:root:Epoch[246] Speed: 3211.16 samples/sec [1,0]:INFO:root:Epoch[247] Batch[100] Loss[2.247] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[100] rmse=0.017661 lr=0.007464 [1,0]:INFO:root:Epoch[247] Batch[200] Loss[1.992] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[200] rmse=0.017692 lr=0.007413 [1,0]:INFO:root:Epoch[247] Batch[300] Loss[2.150] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[300] rmse=0.017697 lr=0.007362 [1,0]:INFO:root:Epoch[247] Batch[400] Loss[2.466] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[400] rmse=0.017682 lr=0.007311 [1,0]:INFO:root:Epoch[247] Batch[500] Loss[4.202] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[500] rmse=0.017677 lr=0.007260 [1,0]:INFO:root:Epoch[247] Batch[600] Loss[2.360] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[600] rmse=0.017685 lr=0.007210 [1,0]:INFO:root:Epoch[247] Batch[700] Loss[4.814] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[700] rmse=0.017687 lr=0.007159 [1,0]:INFO:root:Epoch[247] Batch[800] Loss[2.605] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[800] rmse=0.017693 lr=0.007109 [1,0]:INFO:root:Epoch[247] Batch[900] Loss[4.484] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[900] rmse=0.017699 lr=0.007059 [1,0]:INFO:root:Epoch[247] Batch[1000] Loss[2.270] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[1000] rmse=0.017705 lr=0.007010 [1,0]:INFO:root:Epoch[247] Batch[1100] Loss[3.118] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[1100] rmse=0.017699 lr=0.006960 [1,0]:INFO:root:Epoch[247] Batch[1200] Loss[2.338] [1,0]:INFO:root:Epoch[247] Rank[0] Batch[1200] rmse=0.017707 lr=0.006911 [1,0]:INFO:root:Epoch[247] Rank[0] Batch[1251] Time cost=399.13 Train-metric=0.017713 [1,0]:INFO:root:Epoch[247] Speed: 3209.56 samples/sec [1,0]:INFO:root:Epoch[248] Batch[100] Loss[2.172] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[100] rmse=0.017550 lr=0.006836 [1,0]:INFO:root:Epoch[248] Batch[200] Loss[4.440] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[200] rmse=0.017566 lr=0.006787 [1,0]:INFO:root:Epoch[248] Batch[300] Loss[3.036] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[300] rmse=0.017614 lr=0.006738 [1,0]:INFO:root:Epoch[248] Batch[400] Loss[3.230] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[400] rmse=0.017630 lr=0.006690 [1,0]:INFO:root:Epoch[248] Batch[500] Loss[2.205] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[500] rmse=0.017631 lr=0.006641 [1,0]:INFO:root:Epoch[248] Batch[600] Loss[3.973] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[600] rmse=0.017636 lr=0.006593 [1,0]:INFO:root:Epoch[248] Batch[700] Loss[2.153] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[700] rmse=0.017637 lr=0.006545 [1,0]:INFO:root:Epoch[248] Batch[800] Loss[4.294] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[800] rmse=0.017645 lr=0.006497 [1,0]:INFO:root:Epoch[248] Batch[900] Loss[3.439] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[900] rmse=0.017639 lr=0.006449 [1,0]:INFO:root:Epoch[248] Batch[1000] Loss[2.111] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[1000] rmse=0.017647 lr=0.006401 [1,0]:INFO:root:Epoch[248] Batch[1100] Loss[2.080] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[1100] rmse=0.017664 lr=0.006354 [1,0]:INFO:root:Epoch[248] Batch[1200] Loss[2.493] [1,0]:INFO:root:Epoch[248] Rank[0] Batch[1200] rmse=0.017659 lr=0.006307 [1,0]:INFO:root:Epoch[248] Rank[0] Batch[1251] Time cost=399.93 Train-metric=0.017663 [1,0]:INFO:root:Epoch[248] Speed: 3203.10 samples/sec [1,0]:INFO:root:Epoch[249] Batch[100] Loss[4.404] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[100] rmse=0.017533 lr=0.006236 [1,0]:INFO:root:Epoch[249] Batch[200] Loss[2.216] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[200] rmse=0.017626 lr=0.006189 [1,0]:INFO:root:Epoch[249] Batch[300] Loss[2.214] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[300] rmse=0.017640 lr=0.006142 [1,0]:INFO:root:Epoch[249] Batch[400] Loss[2.097] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[400] rmse=0.017609 lr=0.006096 [1,0]:INFO:root:Epoch[249] Batch[500] Loss[2.685] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[500] rmse=0.017590 lr=0.006049 [1,0]:INFO:root:Epoch[249] Batch[600] Loss[2.119] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[600] rmse=0.017597 lr=0.006003 [1,0]:INFO:root:Epoch[249] Batch[700] Loss[2.135] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[700] rmse=0.017579 lr=0.005957 [1,0]:INFO:root:Epoch[249] Batch[800] Loss[2.133] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[800] rmse=0.017610 lr=0.005912 [1,0]:INFO:root:Epoch[249] Batch[900] Loss[2.271] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[900] rmse=0.017622 lr=0.005866 [1,0]:INFO:root:Epoch[249] Batch[1000] Loss[4.339] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[1000] rmse=0.017630 lr=0.005820 [1,0]:INFO:root:Epoch[249] Batch[1100] Loss[2.167] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[1100] rmse=0.017632 lr=0.005775 [1,0]:INFO:root:Epoch[249] Batch[1200] Loss[2.891] [1,0]:INFO:root:Epoch[249] Rank[0] Batch[1200] rmse=0.017637 lr=0.005730 [1,0]:INFO:root:Epoch[249] Rank[0] Batch[1251] Time cost=399.26 Train-metric=0.017637 [1,0]:INFO:root:Epoch[249] Speed: 3208.50 samples/sec [1,0]:INFO:root:Epoch[249] Rank[0] Validation-accuracy=0.751920 Validation-top_k_accuracy_5=0.923380 [1,0]:INFO:root:Epoch[250] Batch[100] Loss[3.357] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[100] rmse=0.017636 lr=0.005662 [1,0]:INFO:root:Epoch[250] Batch[200] Loss[4.455] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[200] rmse=0.017598 lr=0.005618 [1,0]:INFO:root:Epoch[250] Batch[300] Loss[2.265] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[300] rmse=0.017598 lr=0.005573 [1,0]:INFO:root:Epoch[250] Batch[400] Loss[3.628] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[400] rmse=0.017599 lr=0.005529 [1,0]:INFO:root:Epoch[250] Batch[500] Loss[4.510] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[500] rmse=0.017622 lr=0.005485 [1,0]:INFO:root:Epoch[250] Batch[600] Loss[1.937] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[600] rmse=0.017586 lr=0.005441 [1,0]:INFO:root:Epoch[250] Batch[700] Loss[2.180] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[700] rmse=0.017593 lr=0.005397 [1,0]:INFO:root:Epoch[250] Batch[800] Loss[2.234] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[800] rmse=0.017585 lr=0.005353 [1,0]:INFO:root:Epoch[250] Batch[900] Loss[2.269] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[900] rmse=0.017566 lr=0.005310 [1,0]:INFO:root:Epoch[250] Batch[1000] Loss[2.422] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[1000] rmse=0.017573 lr=0.005267 [1,0]:INFO:root:Epoch[250] Batch[1100] Loss[2.445] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[1100] rmse=0.017562 lr=0.005224 [1,0]:INFO:root:Epoch[250] Batch[1200] Loss[4.724] [1,0]:INFO:root:Epoch[250] Rank[0] Batch[1200] rmse=0.017559 lr=0.005181 [1,0]:INFO:root:Epoch[250] Rank[0] Batch[1251] Time cost=400.41 Train-metric=0.017564 [1,0]:INFO:root:Epoch[250] Speed: 3199.26 samples/sec [1,0]:INFO:root:Epoch[251] Batch[100] Loss[2.687] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[100] rmse=0.017485 lr=0.005116 [1,0]:INFO:root:Epoch[251] Batch[200] Loss[2.708] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[200] rmse=0.017532 lr=0.005074 [1,0]:INFO:root:Epoch[251] Batch[300] Loss[2.173] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[300] rmse=0.017520 lr=0.005031 [1,0]:INFO:root:Epoch[251] Batch[400] Loss[2.181] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[400] rmse=0.017557 lr=0.004989 [1,0]:INFO:root:Epoch[251] Batch[500] Loss[4.670] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[500] rmse=0.017557 lr=0.004947 [1,0]:INFO:root:Epoch[251] Batch[600] Loss[1.892] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[600] rmse=0.017559 lr=0.004906 [1,0]:INFO:root:Epoch[251] Batch[700] Loss[2.078] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[700] rmse=0.017559 lr=0.004864 [1,0]:INFO:root:Epoch[251] Batch[800] Loss[4.783] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[800] rmse=0.017536 lr=0.004823 [1,0]:INFO:root:Epoch[251] Batch[900] Loss[2.080] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[900] rmse=0.017534 lr=0.004781 [1,0]:INFO:root:Epoch[251] Batch[1000] Loss[3.056] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[1000] rmse=0.017531 lr=0.004740 [1,0]:INFO:root:Epoch[251] Batch[1100] Loss[2.172] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[1100] rmse=0.017533 lr=0.004699 [1,0]:INFO:root:Epoch[251] Batch[1200] Loss[3.791] [1,0]:INFO:root:Epoch[251] Rank[0] Batch[1200] rmse=0.017537 lr=0.004659 [1,0]:INFO:root:Epoch[251] Rank[0] Batch[1251] Time cost=400.06 Train-metric=0.017547 [1,0]:INFO:root:Epoch[251] Speed: 3202.05 samples/sec [1,0]:INFO:root:Epoch[252] Batch[100] Loss[1.998] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[100] rmse=0.017619 lr=0.004597 [1,0]:INFO:root:Epoch[252] Batch[200] Loss[4.450] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[200] rmse=0.017434 lr=0.004557 [1,0]:INFO:root:Epoch[252] Batch[300] Loss[2.533] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[300] rmse=0.017484 lr=0.004517 [1,0]:INFO:root:Epoch[252] Batch[400] Loss[2.207] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[400] rmse=0.017498 lr=0.004477 [1,0]:INFO:root:Epoch[252] Batch[500] Loss[2.190] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[500] rmse=0.017517 lr=0.004437 [1,0]:INFO:root:Epoch[252] Batch[600] Loss[4.264] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[600] rmse=0.017533 lr=0.004398 [1,0]:INFO:root:Epoch[252] Batch[700] Loss[2.595] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[700] rmse=0.017531 lr=0.004358 [1,0]:INFO:root:Epoch[252] Batch[800] Loss[2.702] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[800] rmse=0.017518 lr=0.004319 [1,0]:INFO:root:Epoch[252] Batch[900] Loss[2.370] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[900] rmse=0.017522 lr=0.004280 [1,0]:INFO:root:Epoch[252] Batch[1000] Loss[2.947] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[1000] rmse=0.017529 lr=0.004241 [1,0]:INFO:root:Epoch[252] Batch[1100] Loss[2.239] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[1100] rmse=0.017533 lr=0.004203 [1,0]:INFO:root:Epoch[252] Batch[1200] Loss[3.157] [1,0]:INFO:root:Epoch[252] Rank[0] Batch[1200] rmse=0.017532 lr=0.004164 [1,0]:INFO:root:Epoch[252] Rank[0] Batch[1251] Time cost=401.33 Train-metric=0.017525 [1,0]:INFO:root:Epoch[252] Speed: 3191.91 samples/sec [1,0]:INFO:root:Epoch[253] Batch[100] Loss[2.616] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[100] rmse=0.017512 lr=0.004106 [1,0]:INFO:root:Epoch[253] Batch[200] Loss[1.867] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[200] rmse=0.017495 lr=0.004068 [1,0]:INFO:root:Epoch[253] Batch[300] Loss[2.218] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[300] rmse=0.017515 lr=0.004030 [1,0]:INFO:root:Epoch[253] Batch[400] Loss[2.543] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[400] rmse=0.017521 lr=0.003992 [1,0]:INFO:root:Epoch[253] Batch[500] Loss[2.130] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[500] rmse=0.017537 lr=0.003955 [1,0]:INFO:root:Epoch[253] Batch[600] Loss[2.085] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[600] rmse=0.017512 lr=0.003917 [1,0]:INFO:root:Epoch[253] Batch[700] Loss[4.058] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[700] rmse=0.017501 lr=0.003880 [1,0]:INFO:root:Epoch[253] Batch[800] Loss[2.433] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[800] rmse=0.017507 lr=0.003843 [1,0]:INFO:root:Epoch[253] Batch[900] Loss[2.212] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[900] rmse=0.017505 lr=0.003806 [1,0]:INFO:root:Epoch[253] Batch[1000] Loss[1.956] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[1000] rmse=0.017506 lr=0.003770 [1,0]:INFO:root:Epoch[253] Batch[1100] Loss[2.147] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[1100] rmse=0.017506 lr=0.003733 [1,0]:INFO:root:Epoch[253] Batch[1200] Loss[2.018] [1,0]:INFO:root:Epoch[253] Rank[0] Batch[1200] rmse=0.017508 lr=0.003697 [1,0]:INFO:root:Epoch[253] Rank[0] Batch[1251] Time cost=401.63 Train-metric=0.017511 [1,0]:INFO:root:Epoch[253] Speed: 3189.55 samples/sec [1,0]:INFO:root:Epoch[254] Batch[100] Loss[4.449] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[100] rmse=0.017384 lr=0.003642 [1,0]:INFO:root:Epoch[254] Batch[200] Loss[1.913] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[200] rmse=0.017402 lr=0.003606 [1,0]:INFO:root:Epoch[254] Batch[300] Loss[2.217] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[300] rmse=0.017387 lr=0.003571 [1,0]:INFO:root:Epoch[254] Batch[400] Loss[2.424] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[400] rmse=0.017376 lr=0.003535 [1,0]:INFO:root:Epoch[254] Batch[500] Loss[2.025] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[500] rmse=0.017368 lr=0.003500 [1,0]:INFO:root:Epoch[254] Batch[600] Loss[1.999] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[600] rmse=0.017364 lr=0.003465 [1,0]:INFO:root:Epoch[254] Batch[700] Loss[2.673] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[700] rmse=0.017388 lr=0.003430 [1,0]:INFO:root:Epoch[254] Batch[800] Loss[2.709] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[800] rmse=0.017401 lr=0.003395 [1,0]:INFO:root:Epoch[254] Batch[900] Loss[2.377] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[900] rmse=0.017408 lr=0.003360 [1,0]:INFO:root:Epoch[254] Batch[1000] Loss[4.696] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[1000] rmse=0.017404 lr=0.003326 [1,0]:INFO:root:Epoch[254] Batch[1100] Loss[2.360] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[1100] rmse=0.017409 lr=0.003291 [1,0]:INFO:root:Epoch[254] Batch[1200] Loss[4.644] [1,0]:INFO:root:Epoch[254] Rank[0] Batch[1200] rmse=0.017422 lr=0.003257 [1,0]:INFO:root:Epoch[254] Rank[0] Batch[1251] Time cost=400.29 Train-metric=0.017430 [1,0]:INFO:root:Epoch[254] Speed: 3200.23 samples/sec [1,0]:INFO:root:Epoch[254] Rank[0] Validation-accuracy=0.754500 Validation-top_k_accuracy_5=0.925100 [1,0]:INFO:root:Epoch[255] Batch[100] Loss[2.094] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[100] rmse=0.017456 lr=0.003206 [1,0]:INFO:root:Epoch[255] Batch[200] Loss[2.480] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[200] rmse=0.017391 lr=0.003172 [1,0]:INFO:root:Epoch[255] Batch[300] Loss[4.364] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[300] rmse=0.017423 lr=0.003139 [1,0]:INFO:root:Epoch[255] Batch[400] Loss[3.275] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[400] rmse=0.017402 lr=0.003105 [1,0]:INFO:root:Epoch[255] Batch[500] Loss[4.836] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[500] rmse=0.017395 lr=0.003072 [1,0]:INFO:root:Epoch[255] Batch[600] Loss[2.108] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[600] rmse=0.017406 lr=0.003039 [1,0]:INFO:root:Epoch[255] Batch[700] Loss[4.073] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[700] rmse=0.017420 lr=0.003007 [1,0]:INFO:root:Epoch[255] Batch[800] Loss[2.939] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[800] rmse=0.017419 lr=0.002974 [1,0]:INFO:root:Epoch[255] Batch[900] Loss[2.079] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[900] rmse=0.017411 lr=0.002941 [1,0]:INFO:root:Epoch[255] Batch[1000] Loss[2.255] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[1000] rmse=0.017413 lr=0.002909 [1,0]:INFO:root:Epoch[255] Batch[1100] Loss[2.188] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[1100] rmse=0.017412 lr=0.002877 [1,0]:INFO:root:Epoch[255] Batch[1200] Loss[4.341] [1,0]:INFO:root:Epoch[255] Rank[0] Batch[1200] rmse=0.017428 lr=0.002845 [1,0]:INFO:root:Epoch[255] Rank[0] Batch[1251] Time cost=399.17 Train-metric=0.017422 [1,0]:INFO:root:Epoch[255] Speed: 3209.23 samples/sec [1,0]:INFO:root:Epoch[256] Batch[100] Loss[3.863] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[100] rmse=0.017428 lr=0.002797 [1,0]:INFO:root:Epoch[256] Batch[200] Loss[2.447] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[200] rmse=0.017327 lr=0.002766 [1,0]:INFO:root:Epoch[256] Batch[300] Loss[2.247] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[300] rmse=0.017368 lr=0.002735 [1,0]:INFO:root:Epoch[256] Batch[400] Loss[3.791] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[400] rmse=0.017392 lr=0.002703 [1,0]:INFO:root:Epoch[256] Batch[500] Loss[2.245] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[500] rmse=0.017377 lr=0.002673 [1,0]:INFO:root:Epoch[256] Batch[600] Loss[2.011] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[600] rmse=0.017365 lr=0.002642 [1,0]:INFO:root:Epoch[256] Batch[700] Loss[3.894] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[700] rmse=0.017387 lr=0.002611 [1,0]:INFO:root:Epoch[256] Batch[800] Loss[2.020] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[800] rmse=0.017387 lr=0.002581 [1,0]:INFO:root:Epoch[256] Batch[900] Loss[4.103] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[900] rmse=0.017381 lr=0.002550 [1,0]:INFO:root:Epoch[256] Batch[1000] Loss[2.391] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[1000] rmse=0.017384 lr=0.002520 [1,0]:INFO:root:Epoch[256] Batch[1100] Loss[4.746] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[1100] rmse=0.017403 lr=0.002491 [1,0]:INFO:root:Epoch[256] Batch[1200] Loss[2.301] [1,0]:INFO:root:Epoch[256] Rank[0] Batch[1200] rmse=0.017408 lr=0.002461 [1,0]:INFO:root:Epoch[256] Rank[0] Batch[1251] Time cost=399.15 Train-metric=0.017400 [1,0]:INFO:root:Epoch[256] Speed: 3209.37 samples/sec [1,0]:INFO:root:Epoch[257] Batch[100] Loss[4.748] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[100] rmse=0.017307 lr=0.002416 [1,0]:INFO:root:Epoch[257] Batch[200] Loss[2.383] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[200] rmse=0.017301 lr=0.002387 [1,0]:INFO:root:Epoch[257] Batch[300] Loss[4.270] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[300] rmse=0.017301 lr=0.002358 [1,0]:INFO:root:Epoch[257] Batch[400] Loss[2.145] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[400] rmse=0.017305 lr=0.002329 [1,0]:INFO:root:Epoch[257] Batch[500] Loss[4.488] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[500] rmse=0.017320 lr=0.002300 [1,0]:INFO:root:Epoch[257] Batch[600] Loss[2.165] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[600] rmse=0.017335 lr=0.002272 [1,0]:INFO:root:Epoch[257] Batch[700] Loss[2.474] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[700] rmse=0.017326 lr=0.002243 [1,0]:INFO:root:Epoch[257] Batch[800] Loss[3.655] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[800] rmse=0.017341 lr=0.002215 [1,0]:INFO:root:Epoch[257] Batch[900] Loss[1.894] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[900] rmse=0.017335 lr=0.002187 [1,0]:INFO:root:Epoch[257] Batch[1000] Loss[2.141] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[1000] rmse=0.017346 lr=0.002159 [1,0]:INFO:root:Epoch[257] Batch[1100] Loss[2.954] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[1100] rmse=0.017352 lr=0.002132 [1,0]:INFO:root:Epoch[257] Batch[1200] Loss[2.627] [1,0]:INFO:root:Epoch[257] Rank[0] Batch[1200] rmse=0.017363 lr=0.002104 [1,0]:INFO:root:Epoch[257] Rank[0] Batch[1251] Time cost=399.89 Train-metric=0.017368 [1,0]:INFO:root:Epoch[257] Speed: 3203.48 samples/sec [1,0]:INFO:root:Epoch[258] Batch[100] Loss[2.312] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[100] rmse=0.017211 lr=0.002063 [1,0]:INFO:root:Epoch[258] Batch[200] Loss[2.771] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[200] rmse=0.017263 lr=0.002036 [1,0]:INFO:root:Epoch[258] Batch[300] Loss[2.075] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[300] rmse=0.017278 lr=0.002009 [1,0]:INFO:root:Epoch[258] Batch[400] Loss[4.227] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[400] rmse=0.017323 lr=0.001982 [1,0]:INFO:root:Epoch[258] Batch[500] Loss[2.260] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[500] rmse=0.017350 lr=0.001956 [1,0]:INFO:root:Epoch[258] Batch[600] Loss[2.383] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[600] rmse=0.017344 lr=0.001930 [1,0]:INFO:root:Epoch[258] Batch[700] Loss[4.011] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[700] rmse=0.017344 lr=0.001903 [1,0]:INFO:root:Epoch[258] Batch[800] Loss[3.939] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[800] rmse=0.017341 lr=0.001877 [1,0]:INFO:root:Epoch[258] Batch[900] Loss[4.151] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[900] rmse=0.017349 lr=0.001852 [1,0]:INFO:root:Epoch[258] Batch[1000] Loss[1.833] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[1000] rmse=0.017348 lr=0.001826 [1,0]:INFO:root:Epoch[258] Batch[1100] Loss[1.869] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[1100] rmse=0.017333 lr=0.001801 [1,0]:INFO:root:Epoch[258] Batch[1200] Loss[2.003] [1,0]:INFO:root:Epoch[258] Rank[0] Batch[1200] rmse=0.017336 lr=0.001775 [1,0]:INFO:root:Epoch[258] Rank[0] Batch[1251] Time cost=399.96 Train-metric=0.017339 [1,0]:INFO:root:Epoch[258] Speed: 3202.91 samples/sec [1,0]:INFO:root:Epoch[259] Batch[100] Loss[1.965] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[100] rmse=0.017234 lr=0.001738 [1,0]:INFO:root:Epoch[259] Batch[200] Loss[1.967] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[200] rmse=0.017191 lr=0.001713 [1,0]:INFO:root:Epoch[259] Batch[300] Loss[2.380] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[300] rmse=0.017305 lr=0.001688 [1,0]:INFO:root:Epoch[259] Batch[400] Loss[2.081] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[400] rmse=0.017319 lr=0.001664 [1,0]:INFO:root:Epoch[259] Batch[500] Loss[4.133] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[500] rmse=0.017342 lr=0.001639 [1,0]:INFO:root:Epoch[259] Batch[600] Loss[3.087] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[600] rmse=0.017341 lr=0.001615 [1,0]:INFO:root:Epoch[259] Batch[700] Loss[2.256] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[700] rmse=0.017358 lr=0.001591 [1,0]:INFO:root:Epoch[259] Batch[800] Loss[2.064] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[800] rmse=0.017346 lr=0.001568 [1,0]:INFO:root:Epoch[259] Batch[900] Loss[2.154] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[900] rmse=0.017342 lr=0.001544 [1,0]:INFO:root:Epoch[259] Batch[1000] Loss[1.936] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[1000] rmse=0.017329 lr=0.001521 [1,0]:INFO:root:Epoch[259] Batch[1100] Loss[1.959] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[1100] rmse=0.017336 lr=0.001497 [1,0]:INFO:root:Epoch[259] Batch[1200] Loss[2.141] [1,0]:INFO:root:Epoch[259] Rank[0] Batch[1200] rmse=0.017351 lr=0.001474 [1,0]:INFO:root:Epoch[259] Rank[0] Batch[1251] Time cost=399.90 Train-metric=0.017346 [1,0]:INFO:root:Epoch[259] Speed: 3203.33 samples/sec [1,0]:INFO:root:Epoch[259] Rank[0] Validation-accuracy=0.757360 Validation-top_k_accuracy_5=0.926780 [1,0]:INFO:root:Epoch[260] Batch[100] Loss[2.030] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[100] rmse=0.017196 lr=0.001440 [1,0]:INFO:root:Epoch[260] Batch[200] Loss[2.085] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[200] rmse=0.017232 lr=0.001417 [1,0]:INFO:root:Epoch[260] Batch[300] Loss[4.560] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[300] rmse=0.017251 lr=0.001395 [1,0]:INFO:root:Epoch[260] Batch[400] Loss[4.102] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[400] rmse=0.017235 lr=0.001373 [1,0]:INFO:root:Epoch[260] Batch[500] Loss[1.922] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[500] rmse=0.017240 lr=0.001351 [1,0]:INFO:root:Epoch[260] Batch[600] Loss[2.102] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[600] rmse=0.017260 lr=0.001329 [1,0]:INFO:root:Epoch[260] Batch[700] Loss[2.011] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[700] rmse=0.017252 lr=0.001307 [1,0]:INFO:root:Epoch[260] Batch[800] Loss[4.566] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[800] rmse=0.017265 lr=0.001285 [1,0]:INFO:root:Epoch[260] Batch[900] Loss[3.551] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[900] rmse=0.017264 lr=0.001264 [1,0]:INFO:root:Epoch[260] Batch[1000] Loss[2.439] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[1000] rmse=0.017280 lr=0.001243 [1,0]:INFO:root:Epoch[260] Batch[1100] Loss[4.135] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[1100] rmse=0.017279 lr=0.001222 [1,0]:INFO:root:Epoch[260] Batch[1200] Loss[2.281] [1,0]:INFO:root:Epoch[260] Rank[0] Batch[1200] rmse=0.017276 lr=0.001201 [1,0]:INFO:root:Epoch[260] Rank[0] Batch[1251] Time cost=397.17 Train-metric=0.017283 [1,0]:INFO:root:Epoch[260] Speed: 3225.39 samples/sec [1,0]:INFO:root:Epoch[261] Batch[100] Loss[2.058] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[100] rmse=0.017402 lr=0.001170 [1,0]:INFO:root:Epoch[261] Batch[200] Loss[2.228] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[200] rmse=0.017290 lr=0.001150 [1,0]:INFO:root:Epoch[261] Batch[300] Loss[3.992] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[300] rmse=0.017257 lr=0.001129 [1,0]:INFO:root:Epoch[261] Batch[400] Loss[4.443] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[400] rmse=0.017244 lr=0.001109 [1,0]:INFO:root:Epoch[261] Batch[500] Loss[2.110] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[500] rmse=0.017248 lr=0.001090 [1,0]:INFO:root:Epoch[261] Batch[600] Loss[2.419] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[600] rmse=0.017247 lr=0.001070 [1,0]:INFO:root:Epoch[261] Batch[700] Loss[1.778] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[700] rmse=0.017242 lr=0.001050 [1,0]:INFO:root:Epoch[261] Batch[800] Loss[4.466] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[800] rmse=0.017253 lr=0.001031 [1,0]:INFO:root:Epoch[261] Batch[900] Loss[2.162] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[900] rmse=0.017268 lr=0.001012 [1,0]:INFO:root:Epoch[261] Batch[1000] Loss[1.834] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[1000] rmse=0.017274 lr=0.000993 [1,0]:INFO:root:Epoch[261] Batch[1100] Loss[2.028] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[1100] rmse=0.017278 lr=0.000974 [1,0]:INFO:root:Epoch[261] Batch[1200] Loss[2.116] [1,0]:INFO:root:Epoch[261] Rank[0] Batch[1200] rmse=0.017285 lr=0.000956 [1,0]:INFO:root:Epoch[261] Rank[0] Batch[1251] Time cost=399.11 Train-metric=0.017281 [1,0]:INFO:root:Epoch[261] Speed: 3209.69 samples/sec [1,0]:INFO:root:Epoch[262] Batch[100] Loss[2.061] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[100] rmse=0.017392 lr=0.000928 [1,0]:INFO:root:Epoch[262] Batch[200] Loss[2.189] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[200] rmse=0.017346 lr=0.000910 [1,0]:INFO:root:Epoch[262] Batch[300] Loss[2.586] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[300] rmse=0.017314 lr=0.000892 [1,0]:INFO:root:Epoch[262] Batch[400] Loss[2.187] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[400] rmse=0.017283 lr=0.000874 [1,0]:INFO:root:Epoch[262] Batch[500] Loss[2.230] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[500] rmse=0.017270 lr=0.000857 [1,0]:INFO:root:Epoch[262] Batch[600] Loss[3.939] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[600] rmse=0.017289 lr=0.000839 [1,0]:INFO:root:Epoch[262] Batch[700] Loss[2.251] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[700] rmse=0.017277 lr=0.000822 [1,0]:INFO:root:Epoch[262] Batch[800] Loss[2.814] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[800] rmse=0.017263 lr=0.000805 [1,0]:INFO:root:Epoch[262] Batch[900] Loss[2.174] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[900] rmse=0.017254 lr=0.000788 [1,0]:INFO:root:Epoch[262] Batch[1000] Loss[4.750] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[1000] rmse=0.017263 lr=0.000771 [1,0]:INFO:root:Epoch[262] Batch[1100] Loss[4.676] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[1100] rmse=0.017274 lr=0.000755 [1,0]:INFO:root:Epoch[262] Batch[1200] Loss[2.178] [1,0]:INFO:root:Epoch[262] Rank[0] Batch[1200] rmse=0.017275 lr=0.000738 [1,0]:INFO:root:Epoch[262] Rank[0] Batch[1251] Time cost=399.46 Train-metric=0.017265 [1,0]:INFO:root:Epoch[262] Speed: 3206.91 samples/sec [1,0]:INFO:root:Epoch[263] Batch[100] Loss[3.285] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[100] rmse=0.017157 lr=0.000714 [1,0]:INFO:root:Epoch[263] Batch[200] Loss[2.494] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[200] rmse=0.017251 lr=0.000698 [1,0]:INFO:root:Epoch[263] Batch[300] Loss[2.724] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[300] rmse=0.017232 lr=0.000682 [1,0]:INFO:root:Epoch[263] Batch[400] Loss[2.710] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[400] rmse=0.017264 lr=0.000667 [1,0]:INFO:root:Epoch[263] Batch[500] Loss[4.703] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[500] rmse=0.017247 lr=0.000652 [1,0]:INFO:root:Epoch[263] Batch[600] Loss[2.103] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[600] rmse=0.017217 lr=0.000636 [1,0]:INFO:root:Epoch[263] Batch[700] Loss[1.953] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[700] rmse=0.017233 lr=0.000621 [1,0]:INFO:root:Epoch[263] Batch[800] Loss[1.983] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[800] rmse=0.017246 lr=0.000607 [1,0]:INFO:root:Epoch[263] Batch[900] Loss[2.076] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[900] rmse=0.017235 lr=0.000592 [1,0]:INFO:root:Epoch[263] Batch[1000] Loss[2.210] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[1000] rmse=0.017234 lr=0.000577 [1,0]:INFO:root:Epoch[263] Batch[1100] Loss[3.337] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[1100] rmse=0.017238 lr=0.000563 [1,0]:INFO:root:Epoch[263] Batch[1200] Loss[1.998] [1,0]:INFO:root:Epoch[263] Rank[0] Batch[1200] rmse=0.017236 lr=0.000549 [1,0]:INFO:root:Epoch[263] Rank[0] Batch[1251] Time cost=399.22 Train-metric=0.017238 [1,0]:INFO:root:Epoch[263] Speed: 3208.81 samples/sec [1,0]:INFO:root:Epoch[264] Batch[100] Loss[1.807] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[100] rmse=0.017111 lr=0.000528 [1,0]:INFO:root:Epoch[264] Batch[200] Loss[1.874] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[200] rmse=0.017206 lr=0.000514 [1,0]:INFO:root:Epoch[264] Batch[300] Loss[2.982] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[300] rmse=0.017207 lr=0.000501 [1,0]:INFO:root:Epoch[264] Batch[400] Loss[2.172] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[400] rmse=0.017216 lr=0.000488 [1,0]:INFO:root:Epoch[264] Batch[500] Loss[3.567] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[500] rmse=0.017199 lr=0.000474 [1,0]:INFO:root:Epoch[264] Batch[600] Loss[4.602] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[600] rmse=0.017211 lr=0.000462 [1,0]:INFO:root:Epoch[264] Batch[700] Loss[2.183] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[700] rmse=0.017221 lr=0.000449 [1,0]:INFO:root:Epoch[264] Batch[800] Loss[3.683] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[800] rmse=0.017220 lr=0.000436 [1,0]:INFO:root:Epoch[264] Batch[900] Loss[2.132] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[900] rmse=0.017213 lr=0.000424 [1,0]:INFO:root:Epoch[264] Batch[1000] Loss[1.931] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[1000] rmse=0.017227 lr=0.000412 [1,0]:INFO:root:Epoch[264] Batch[1100] Loss[2.603] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[1100] rmse=0.017243 lr=0.000399 [1,0]:INFO:root:Epoch[264] Batch[1200] Loss[2.060] [1,0]:INFO:root:Epoch[264] Rank[0] Batch[1200] rmse=0.017248 lr=0.000388 [1,0]:INFO:root:Epoch[264] Rank[0] Batch[1251] Time cost=398.85 Train-metric=0.017253 [1,0]:INFO:root:Epoch[264] Speed: 3211.76 samples/sec [1,0]:INFO:root:Epoch[264] Rank[0] Validation-accuracy=0.759560 Validation-top_k_accuracy_5=0.927940 [1,0]:INFO:root:Epoch[265] Batch[100] Loss[2.259] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[100] rmse=0.017186 lr=0.000370 [1,0]:INFO:root:Epoch[265] Batch[200] Loss[2.252] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[200] rmse=0.017275 lr=0.000359 [1,0]:INFO:root:Epoch[265] Batch[300] Loss[2.079] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[300] rmse=0.017283 lr=0.000347 [1,0]:INFO:root:Epoch[265] Batch[400] Loss[2.032] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[400] rmse=0.017280 lr=0.000336 [1,0]:INFO:root:Epoch[265] Batch[500] Loss[1.969] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[500] rmse=0.017274 lr=0.000325 [1,0]:INFO:root:Epoch[265] Batch[600] Loss[3.110] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[600] rmse=0.017254 lr=0.000315 [1,0]:INFO:root:Epoch[265] Batch[700] Loss[3.185] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[700] rmse=0.017246 lr=0.000304 [1,0]:INFO:root:Epoch[265] Batch[800] Loss[3.282] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[800] rmse=0.017252 lr=0.000294 [1,0]:INFO:root:Epoch[265] Batch[900] Loss[2.712] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[900] rmse=0.017244 lr=0.000284 [1,0]:INFO:root:Epoch[265] Batch[1000] Loss[4.557] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[1000] rmse=0.017237 lr=0.000274 [1,0]:INFO:root:Epoch[265] Batch[1100] Loss[3.566] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[1100] rmse=0.017237 lr=0.000264 [1,0]:INFO:root:Epoch[265] Batch[1200] Loss[2.197] [1,0]:INFO:root:Epoch[265] Rank[0] Batch[1200] rmse=0.017242 lr=0.000254 [1,0]:INFO:root:Epoch[265] Rank[0] Batch[1251] Time cost=398.55 Train-metric=0.017246 [1,0]:INFO:root:Epoch[265] Speed: 3214.21 samples/sec [1,0]:INFO:root:Epoch[266] Batch[100] Loss[2.410] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[100] rmse=0.017238 lr=0.000240 [1,0]:INFO:root:Epoch[266] Batch[200] Loss[3.596] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[200] rmse=0.017308 lr=0.000231 [1,0]:INFO:root:Epoch[266] Batch[300] Loss[2.095] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[300] rmse=0.017271 lr=0.000222 [1,0]:INFO:root:Epoch[266] Batch[400] Loss[3.398] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[400] rmse=0.017260 lr=0.000213 [1,0]:INFO:root:Epoch[266] Batch[500] Loss[2.831] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[500] rmse=0.017237 lr=0.000204 [1,0]:INFO:root:Epoch[266] Batch[600] Loss[4.693] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[600] rmse=0.017216 lr=0.000196 [1,0]:INFO:root:Epoch[266] Batch[700] Loss[3.093] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[700] rmse=0.017203 lr=0.000188 [1,0]:INFO:root:Epoch[266] Batch[800] Loss[2.260] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[800] rmse=0.017210 lr=0.000179 [1,0]:INFO:root:Epoch[266] Batch[900] Loss[2.040] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[900] rmse=0.017220 lr=0.000172 [1,0]:INFO:root:Epoch[266] Batch[1000] Loss[2.088] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[1000] rmse=0.017223 lr=0.000164 [1,0]:INFO:root:Epoch[266] Batch[1100] Loss[2.266] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[1100] rmse=0.017212 lr=0.000156 [1,0]:INFO:root:Epoch[266] Batch[1200] Loss[4.662] [1,0]:INFO:root:Epoch[266] Rank[0] Batch[1200] rmse=0.017205 lr=0.000149 [1,0]:INFO:root:Epoch[266] Rank[0] Batch[1251] Time cost=398.88 Train-metric=0.017203 [1,0]:INFO:root:Epoch[266] Speed: 3211.55 samples/sec [1,0]:INFO:root:Epoch[267] Batch[100] Loss[2.142] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[100] rmse=0.017330 lr=0.000138 [1,0]:INFO:root:Epoch[267] Batch[200] Loss[1.857] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[200] rmse=0.017212 lr=0.000131 [1,0]:INFO:root:Epoch[267] Batch[300] Loss[3.076] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[300] rmse=0.017172 lr=0.000124 [1,0]:INFO:root:Epoch[267] Batch[400] Loss[2.034] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[400] rmse=0.017185 lr=0.000118 [1,0]:INFO:root:Epoch[267] Batch[500] Loss[2.032] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[500] rmse=0.017182 lr=0.000111 [1,0]:INFO:root:Epoch[267] Batch[600] Loss[2.754] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[600] rmse=0.017185 lr=0.000105 [1,0]:INFO:root:Epoch[267] Batch[700] Loss[4.571] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[700] rmse=0.017193 lr=0.000099 [1,0]:INFO:root:Epoch[267] Batch[800] Loss[2.124] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[800] rmse=0.017180 lr=0.000093 [1,0]:INFO:root:Epoch[267] Batch[900] Loss[2.436] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[900] rmse=0.017173 lr=0.000087 [1,0]:INFO:root:Epoch[267] Batch[1000] Loss[3.540] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[1000] rmse=0.017188 lr=0.000082 [1,0]:INFO:root:Epoch[267] Batch[1100] Loss[3.839] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[1100] rmse=0.017187 lr=0.000077 [1,0]:INFO:root:Epoch[267] Batch[1200] Loss[2.366] [1,0]:INFO:root:Epoch[267] Rank[0] Batch[1200] rmse=0.017203 lr=0.000071 [1,0]:INFO:root:Epoch[267] Rank[0] Batch[1251] Time cost=398.63 Train-metric=0.017211 [1,0]:INFO:root:Epoch[267] Speed: 3213.53 samples/sec [1,0]:INFO:root:Epoch[268] Batch[100] Loss[2.768] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[100] rmse=0.017178 lr=0.000064 [1,0]:INFO:root:Epoch[268] Batch[200] Loss[3.630] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[200] rmse=0.017172 lr=0.000059 [1,0]:INFO:root:Epoch[268] Batch[300] Loss[2.170] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[300] rmse=0.017263 lr=0.000055 [1,0]:INFO:root:Epoch[268] Batch[400] Loss[2.091] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[400] rmse=0.017265 lr=0.000050 [1,0]:INFO:root:Epoch[268] Batch[500] Loss[3.077] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[500] rmse=0.017262 lr=0.000046 [1,0]:INFO:root:Epoch[268] Batch[600] Loss[2.144] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[600] rmse=0.017264 lr=0.000042 [1,0]:INFO:root:Epoch[268] Batch[700] Loss[2.206] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[700] rmse=0.017242 lr=0.000038 [1,0]:INFO:root:Epoch[268] Batch[800] Loss[4.643] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[800] rmse=0.017227 lr=0.000035 [1,0]:INFO:root:Epoch[268] Batch[900] Loss[2.754] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[900] rmse=0.017204 lr=0.000031 [1,0]:INFO:root:Epoch[268] Batch[1000] Loss[2.844] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[1000] rmse=0.017193 lr=0.000028 [1,0]:INFO:root:Epoch[268] Batch[1100] Loss[3.554] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[1100] rmse=0.017198 lr=0.000025 [1,0]:INFO:root:Epoch[268] Batch[1200] Loss[2.405] [1,0]:INFO:root:Epoch[268] Rank[0] Batch[1200] rmse=0.017200 lr=0.000022 [1,0]:INFO:root:Epoch[268] Rank[0] Batch[1251] Time cost=398.52 Train-metric=0.017194 [1,0]:INFO:root:Epoch[268] Speed: 3214.46 samples/sec [1,0]:INFO:root:Epoch[269] Batch[100] Loss[2.272] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[100] rmse=0.017212 lr=0.000018 [1,0]:INFO:root:Epoch[269] Batch[200] Loss[4.211] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[200] rmse=0.017199 lr=0.000016 [1,0]:INFO:root:Epoch[269] Batch[300] Loss[2.316] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[300] rmse=0.017238 lr=0.000013 [1,0]:INFO:root:Epoch[269] Batch[400] Loss[2.176] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[400] rmse=0.017195 lr=0.000011 [1,0]:INFO:root:Epoch[269] Batch[500] Loss[4.696] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[500] rmse=0.017196 lr=0.000009 [1,0]:INFO:root:Epoch[269] Batch[600] Loss[2.015] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[600] rmse=0.017202 lr=0.000008 [1,0]:INFO:root:Epoch[269] Batch[700] Loss[4.063] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[700] rmse=0.017215 lr=0.000006 [1,0]:INFO:root:Epoch[269] Batch[800] Loss[2.406] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[800] rmse=0.017213 lr=0.000005 [1,0]:INFO:root:Epoch[269] Batch[900] Loss[4.529] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[900] rmse=0.017206 lr=0.000003 [1,0]:INFO:root:Epoch[269] Batch[1000] Loss[2.900] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[1000] rmse=0.017191 lr=0.000002 [1,0]:INFO:root:Epoch[269] Batch[1100] Loss[2.220] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[1100] rmse=0.017193 lr=0.000002 [1,0]:INFO:root:Epoch[269] Batch[1200] Loss[2.029] [1,0]:INFO:root:Epoch[269] Rank[0] Batch[1200] rmse=0.017185 lr=0.000001 [1,0]:INFO:root:Epoch[269] Rank[0] Batch[1251] Time cost=399.55 Train-metric=0.017193 [1,0]:INFO:root:Epoch[269] Speed: 3206.17 samples/sec [1,0]:INFO:root:Epoch[269] Rank[0] Validation-accuracy=0.758760 Validation-top_k_accuracy_5=0.927580 [1,0]:INFO:root:Epoch[269] Rank[0] Validation-accuracy=0.758760 Validation-top_k_accuracy_5=0.927580