python train_mixup_cifar10.py --num-epochs 220 --mode hybrid --num-gpus 1 -j 2 --batch-size 128 --wd 0.0001 --lr 0.1 --lr-decay 0.1 --lr-decay-epoch 100,150 --model cifar_resnet20_v1 INFO:root:Namespace(batch_size=128, drop_rate=0.0, logging_dir='logs', lr=0.1, lr_decay=0.1, lr_decay_epoch='100,150', lr_decay_period=0, mode='hybrid', model='cifar_resnet20_v1', momentum=0.9, num_epochs=220, num_gpus=1, num_workers=2, resume_from=None, save_dir='params', save_period=10, save_plot_dir='.', wd=0.0001) [01:53:22] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:107: Running performance tests to find the best convolution algorithm, this can take a while... (setting env variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable) INFO:root:[Epoch 0] train=0.219350 val=0.442100 loss=1.898745 time: 15.223737 INFO:root:[Epoch 1] train=0.199065 val=0.627600 loss=1.625330 time: 14.458764 INFO:root:[Epoch 2] train=0.185608 val=0.701700 loss=1.520697 time: 14.917482 INFO:root:[Epoch 3] train=0.177612 val=0.737000 loss=1.445652 time: 14.682470 INFO:root:[Epoch 4] train=0.171849 val=0.724400 loss=1.386042 time: 14.523251 INFO:root:[Epoch 5] train=0.168969 val=0.737200 loss=1.397037 time: 15.178052 INFO:root:[Epoch 6] train=0.166638 val=0.744400 loss=1.375635 time: 14.712982 INFO:root:[Epoch 7] train=0.164888 val=0.789300 loss=1.339918 time: 14.840347 INFO:root:[Epoch 8] train=0.162454 val=0.748900 loss=1.309127 time: 14.922801 INFO:root:[Epoch 9] train=0.161351 val=0.784100 loss=1.325970 time: 14.673859 INFO:root:[Epoch 10] train=0.158938 val=0.778200 loss=1.286132 time: 15.587364 INFO:root:[Epoch 11] train=0.157640 val=0.807600 loss=1.307407 time: 15.307076 INFO:root:[Epoch 12] train=0.155163 val=0.794400 loss=1.251386 time: 14.509557 INFO:root:[Epoch 13] train=0.152488 val=0.808600 loss=1.231661 time: 15.159009 INFO:root:[Epoch 14] train=0.152606 val=0.808100 loss=1.275582 time: 15.329986 INFO:root:[Epoch 15] train=0.151925 val=0.826400 loss=1.254927 time: 15.432603 INFO:root:[Epoch 16] train=0.151685 val=0.782600 loss=1.250755 time: 14.964801 INFO:root:[Epoch 17] train=0.150352 val=0.821100 loss=1.228475 time: 14.655092 INFO:root:[Epoch 18] train=0.147183 val=0.835700 loss=1.184752 time: 14.720149 INFO:root:[Epoch 19] train=0.149419 val=0.795500 loss=1.260669 time: 14.856782 INFO:root:[Epoch 20] train=0.148903 val=0.824700 loss=1.240649 time: 15.557463 INFO:root:[Epoch 21] train=0.147480 val=0.838300 loss=1.226177 time: 15.635911 INFO:root:[Epoch 22] train=0.147238 val=0.836600 loss=1.237098 time: 15.096544 INFO:root:[Epoch 23] train=0.146946 val=0.819800 loss=1.224043 time: 15.302335 INFO:root:[Epoch 24] train=0.144647 val=0.829000 loss=1.197893 time: 15.061534 INFO:root:[Epoch 25] train=0.146886 val=0.806600 loss=1.245461 time: 14.937141 INFO:root:[Epoch 26] train=0.145617 val=0.843400 loss=1.218200 time: 14.784720 INFO:root:[Epoch 27] train=0.144473 val=0.846200 loss=1.203638 time: 14.674648 INFO:root:[Epoch 28] train=0.141772 val=0.832500 loss=1.146280 time: 14.559074 INFO:root:[Epoch 29] train=0.143297 val=0.814200 loss=1.192184 time: 14.544164 INFO:root:[Epoch 30] train=0.143076 val=0.797700 loss=1.198133 time: 14.718525 INFO:root:[Epoch 31] train=0.142333 val=0.840200 loss=1.159779 time: 14.943947 INFO:root:[Epoch 32] train=0.141020 val=0.845600 loss=1.172058 time: 15.592301 INFO:root:[Epoch 33] train=0.140950 val=0.818500 loss=1.170862 time: 14.935344 INFO:root:[Epoch 34] train=0.142620 val=0.844100 loss=1.190353 time: 15.017728 INFO:root:[Epoch 35] train=0.141449 val=0.846200 loss=1.202895 time: 14.723240 INFO:root:[Epoch 36] train=0.141153 val=0.828700 loss=1.172370 time: 14.688206 INFO:root:[Epoch 37] train=0.141211 val=0.855800 loss=1.187483 time: 15.718668 INFO:root:[Epoch 38] train=0.139878 val=0.839300 loss=1.172076 time: 14.897097 INFO:root:[Epoch 39] train=0.140615 val=0.863000 loss=1.173754 time: 14.994109 INFO:root:[Epoch 40] train=0.139838 val=0.846000 loss=1.174933 time: 15.267349 INFO:root:[Epoch 41] train=0.140122 val=0.822000 loss=1.176463 time: 14.955363 INFO:root:[Epoch 42] train=0.139249 val=0.852900 loss=1.160292 time: 14.886465 INFO:root:[Epoch 43] train=0.137015 val=0.843100 loss=1.134076 time: 15.454987 INFO:root:[Epoch 44] train=0.141302 val=0.858500 loss=1.201383 time: 14.977687 INFO:root:[Epoch 45] train=0.139248 val=0.869300 loss=1.159911 time: 14.615491 INFO:root:[Epoch 46] train=0.138907 val=0.851500 loss=1.169290 time: 14.701462 INFO:root:[Epoch 47] train=0.138631 val=0.845700 loss=1.181251 time: 14.450665 INFO:root:[Epoch 48] train=0.138244 val=0.852100 loss=1.148771 time: 15.128932 INFO:root:[Epoch 49] train=0.137554 val=0.867000 loss=1.154696 time: 14.921686 INFO:root:[Epoch 50] train=0.136665 val=0.818200 loss=1.149376 time: 14.884406 INFO:root:[Epoch 51] train=0.136812 val=0.860200 loss=1.129036 time: 14.710902 INFO:root:[Epoch 52] train=0.138098 val=0.873300 loss=1.169164 time: 15.034581 INFO:root:[Epoch 53] train=0.138172 val=0.818000 loss=1.170780 time: 14.885295 INFO:root:[Epoch 54] train=0.136145 val=0.865200 loss=1.137429 time: 14.704910 INFO:root:[Epoch 55] train=0.137889 val=0.847800 loss=1.156626 time: 14.761960 INFO:root:[Epoch 56] train=0.136864 val=0.856600 loss=1.151058 time: 14.857620 INFO:root:[Epoch 57] train=0.136618 val=0.836000 loss=1.160333 time: 14.916604 INFO:root:[Epoch 58] train=0.135771 val=0.857100 loss=1.121204 time: 14.885699 INFO:root:[Epoch 59] train=0.136440 val=0.864200 loss=1.142815 time: 15.628657 INFO:root:[Epoch 60] train=0.137859 val=0.821700 loss=1.168225 time: 14.777254 INFO:root:[Epoch 61] train=0.136676 val=0.841800 loss=1.157503 time: 14.877750 INFO:root:[Epoch 62] train=0.136321 val=0.842900 loss=1.137228 time: 14.762469 INFO:root:[Epoch 63] train=0.134788 val=0.849600 loss=1.119241 time: 14.490852 INFO:root:[Epoch 64] train=0.135282 val=0.869700 loss=1.134542 time: 15.130375 INFO:root:[Epoch 65] train=0.136527 val=0.867700 loss=1.150169 time: 14.908622 INFO:root:[Epoch 66] train=0.134519 val=0.858100 loss=1.130854 time: 14.712039 INFO:root:[Epoch 67] train=0.136827 val=0.844100 loss=1.158577 time: 14.699273 INFO:root:[Epoch 68] train=0.135455 val=0.855900 loss=1.132311 time: 15.660164 INFO:root:[Epoch 69] train=0.135362 val=0.852700 loss=1.124464 time: 14.797802 INFO:root:[Epoch 70] train=0.134512 val=0.856300 loss=1.118639 time: 15.287276 INFO:root:[Epoch 71] train=0.135706 val=0.834300 loss=1.145748 time: 14.859220 INFO:root:[Epoch 72] train=0.136513 val=0.811700 loss=1.161095 time: 15.024548 INFO:root:[Epoch 73] train=0.133925 val=0.874900 loss=1.106405 time: 15.046381 INFO:root:[Epoch 74] train=0.135169 val=0.852200 loss=1.131146 time: 14.805157 INFO:root:[Epoch 75] train=0.134158 val=0.864000 loss=1.116274 time: 15.481867 INFO:root:[Epoch 76] train=0.134361 val=0.866700 loss=1.127354 time: 14.619638 INFO:root:[Epoch 77] train=0.134855 val=0.862500 loss=1.136967 time: 14.720755 INFO:root:[Epoch 78] train=0.135446 val=0.877900 loss=1.157992 time: 14.925667 INFO:root:[Epoch 79] train=0.133425 val=0.866700 loss=1.117679 time: 14.683910 INFO:root:[Epoch 80] train=0.134064 val=0.859400 loss=1.114480 time: 15.507363 INFO:root:[Epoch 81] train=0.135033 val=0.852300 loss=1.154469 time: 14.872412 INFO:root:[Epoch 82] train=0.134516 val=0.856500 loss=1.145751 time: 15.795262 INFO:root:[Epoch 83] train=0.133398 val=0.846800 loss=1.118153 time: 14.724391 INFO:root:[Epoch 84] train=0.134477 val=0.866500 loss=1.129139 time: 14.799140 INFO:root:[Epoch 85] train=0.135176 val=0.820500 loss=1.148783 time: 14.932642 INFO:root:[Epoch 86] train=0.133087 val=0.860300 loss=1.113189 time: 14.620316 INFO:root:[Epoch 87] train=0.132656 val=0.874000 loss=1.105120 time: 14.768003 INFO:root:[Epoch 88] train=0.135544 val=0.861600 loss=1.154698 time: 14.728120 INFO:root:[Epoch 89] train=0.132601 val=0.842600 loss=1.129514 time: 14.941008 INFO:root:[Epoch 90] train=0.132692 val=0.838200 loss=1.124923 time: 14.591231 INFO:root:[Epoch 91] train=0.134730 val=0.860300 loss=1.140476 time: 15.781524 INFO:root:[Epoch 92] train=0.133655 val=0.813600 loss=1.135255 time: 15.374075 INFO:root:[Epoch 93] train=0.132743 val=0.861700 loss=1.129478 time: 14.915272 INFO:root:[Epoch 94] train=0.133717 val=0.845100 loss=1.124674 time: 14.693224 INFO:root:[Epoch 95] train=0.133123 val=0.852400 loss=1.118330 time: 14.992239 INFO:root:[Epoch 96] train=0.130834 val=0.868100 loss=1.079351 time: 14.625501 INFO:root:[Epoch 97] train=0.132884 val=0.839300 loss=1.101914 time: 15.921066 INFO:root:[Epoch 98] train=0.134193 val=0.827600 loss=1.132491 time: 15.392793 INFO:root:[Epoch 99] train=0.131627 val=0.861700 loss=1.090046 time: 14.426976 INFO:root:[Epoch 100] train=0.124400 val=0.899700 loss=1.087879 time: 14.963214 INFO:root:[Epoch 101] train=0.121312 val=0.906000 loss=1.050782 time: 15.036295 INFO:root:[Epoch 102] train=0.116693 val=0.913300 loss=1.001507 time: 14.870364 INFO:root:[Epoch 103] train=0.118379 val=0.910200 loss=1.035791 time: 14.768860 INFO:root:[Epoch 104] train=0.116510 val=0.907900 loss=1.001752 time: 14.625885 INFO:root:[Epoch 105] train=0.118256 val=0.906300 loss=1.047746 time: 14.700061 INFO:root:[Epoch 106] train=0.119114 val=0.912800 loss=1.050644 time: 14.864033 INFO:root:[Epoch 107] train=0.117000 val=0.910300 loss=1.020595 time: 14.994747 INFO:root:[Epoch 108] train=0.118113 val=0.908600 loss=1.040286 time: 14.796971 INFO:root:[Epoch 109] train=0.115249 val=0.909300 loss=0.998814 time: 14.774264 INFO:root:[Epoch 110] train=0.116549 val=0.913900 loss=1.020184 time: 14.829585 INFO:root:[Epoch 111] train=0.116074 val=0.912500 loss=1.013137 time: 14.624498 INFO:root:[Epoch 112] train=0.116976 val=0.914600 loss=1.048082 time: 14.795966 INFO:root:[Epoch 113] train=0.115627 val=0.912100 loss=1.032480 time: 14.745152 INFO:root:[Epoch 114] train=0.114935 val=0.911900 loss=1.013229 time: 14.867087 INFO:root:[Epoch 115] train=0.118823 val=0.912400 loss=1.062790 time: 15.112724 INFO:root:[Epoch 116] train=0.115569 val=0.908700 loss=1.018441 time: 14.836329 INFO:root:[Epoch 117] train=0.114280 val=0.912800 loss=1.012307 time: 14.740478 INFO:root:[Epoch 118] train=0.115260 val=0.917400 loss=1.016864 time: 14.837527 INFO:root:[Epoch 119] train=0.111966 val=0.913200 loss=0.969644 time: 14.646590 INFO:root:[Epoch 120] train=0.115737 val=0.912700 loss=1.019397 time: 14.679328 INFO:root:[Epoch 121] train=0.113590 val=0.912800 loss=0.999965 time: 14.607461 INFO:root:[Epoch 122] train=0.116221 val=0.912300 loss=1.034661 time: 14.906367 INFO:root:[Epoch 123] train=0.112747 val=0.912700 loss=1.004540 time: 15.225174 INFO:root:[Epoch 124] train=0.111693 val=0.916900 loss=0.972196 time: 15.001822 INFO:root:[Epoch 125] train=0.114209 val=0.912200 loss=1.004237 time: 15.513529 INFO:root:[Epoch 126] train=0.112321 val=0.914200 loss=0.990863 time: 15.458532 INFO:root:[Epoch 127] train=0.112958 val=0.917000 loss=1.000495 time: 15.094835 INFO:root:[Epoch 128] train=0.114132 val=0.912700 loss=1.028111 time: 15.375230 INFO:root:[Epoch 129] train=0.113772 val=0.915600 loss=1.003963 time: 15.153053 INFO:root:[Epoch 130] train=0.111478 val=0.920100 loss=0.969821 time: 14.850194 INFO:root:[Epoch 131] train=0.112774 val=0.918000 loss=0.998570 time: 14.770657 INFO:root:[Epoch 132] train=0.114031 val=0.913900 loss=1.012055 time: 15.650185 INFO:root:[Epoch 133] train=0.113335 val=0.911800 loss=1.003452 time: 14.843538 INFO:root:[Epoch 134] train=0.113271 val=0.918400 loss=1.018314 time: 15.220881 INFO:root:[Epoch 135] train=0.115617 val=0.909600 loss=1.030986 time: 14.946962 INFO:root:[Epoch 136] train=0.111127 val=0.914100 loss=0.977114 time: 15.078085 INFO:root:[Epoch 137] train=0.113967 val=0.914500 loss=1.002509 time: 14.977228 INFO:root:[Epoch 138] train=0.110831 val=0.914300 loss=0.970495 time: 15.537740 INFO:root:[Epoch 139] train=0.113159 val=0.911300 loss=1.007186 time: 15.572331 INFO:root:[Epoch 140] train=0.111242 val=0.911600 loss=0.981227 time: 15.436458 INFO:root:[Epoch 141] train=0.112771 val=0.915100 loss=0.990389 time: 14.930776 INFO:root:[Epoch 142] train=0.113091 val=0.917600 loss=1.006408 time: 14.864911 INFO:root:[Epoch 143] train=0.113549 val=0.914400 loss=1.023123 time: 15.190433 INFO:root:[Epoch 144] train=0.111686 val=0.920100 loss=0.985300 time: 14.719650 INFO:root:[Epoch 145] train=0.113851 val=0.916200 loss=1.011722 time: 14.674854 INFO:root:[Epoch 146] train=0.114468 val=0.912100 loss=1.029200 time: 14.583114 INFO:root:[Epoch 147] train=0.111567 val=0.913700 loss=0.980381 time: 15.057461 INFO:root:[Epoch 148] train=0.111785 val=0.916800 loss=1.002828 time: 14.880398 INFO:root:[Epoch 149] train=0.112688 val=0.912300 loss=1.006460 time: 14.973571 INFO:root:[Epoch 150] train=0.110636 val=0.921600 loss=0.997361 time: 14.812001 INFO:root:[Epoch 151] train=0.109293 val=0.919400 loss=0.982740 time: 14.970062 INFO:root:[Epoch 152] train=0.109422 val=0.920200 loss=0.976840 time: 14.715374 INFO:root:[Epoch 153] train=0.110920 val=0.920300 loss=1.001172 time: 15.669143 INFO:root:[Epoch 154] train=0.109513 val=0.918600 loss=0.982857 time: 14.892995 INFO:root:[Epoch 155] train=0.107330 val=0.919700 loss=0.955348 time: 14.849082 INFO:root:[Epoch 156] train=0.106979 val=0.920500 loss=0.952286 time: 14.806717 INFO:root:[Epoch 157] train=0.109182 val=0.920600 loss=0.980665 time: 14.914863 INFO:root:[Epoch 158] train=0.107235 val=0.920700 loss=0.957478 time: 14.707672 INFO:root:[Epoch 159] train=0.109001 val=0.919200 loss=0.981061 time: 15.615470 INFO:root:[Epoch 160] train=0.108705 val=0.922600 loss=0.974033 time: 15.628472 INFO:root:[Epoch 161] train=0.107598 val=0.921000 loss=0.961567 time: 15.120503 INFO:root:[Epoch 162] train=0.109596 val=0.918200 loss=0.984349 time: 15.815814 INFO:root:[Epoch 163] train=0.105325 val=0.920500 loss=0.942561 time: 14.738740 INFO:root:[Epoch 164] train=0.109156 val=0.922700 loss=0.981648 time: 14.838383 INFO:root:[Epoch 165] train=0.108858 val=0.921300 loss=0.993409 time: 14.925972 INFO:root:[Epoch 166] train=0.108088 val=0.919700 loss=0.975608 time: 14.814179 INFO:root:[Epoch 167] train=0.108027 val=0.918400 loss=0.979368 time: 14.880730 INFO:root:[Epoch 168] train=0.109992 val=0.917700 loss=0.997064 time: 15.813431 INFO:root:[Epoch 169] train=0.108325 val=0.918800 loss=0.986614 time: 14.730947 INFO:root:[Epoch 170] train=0.107611 val=0.920300 loss=0.970958 time: 14.778224 INFO:root:[Epoch 171] train=0.109676 val=0.920300 loss=0.986105 time: 14.944006 INFO:root:[Epoch 172] train=0.106784 val=0.921000 loss=0.947557 time: 14.722294 INFO:root:[Epoch 173] train=0.108103 val=0.922700 loss=0.978938 time: 15.106142 INFO:root:[Epoch 174] train=0.107958 val=0.920400 loss=0.963115 time: 15.468962 INFO:root:[Epoch 175] train=0.109726 val=0.917600 loss=0.986627 time: 15.714921 INFO:root:[Epoch 176] train=0.109207 val=0.919300 loss=0.986805 time: 15.356779 INFO:root:[Epoch 177] train=0.107296 val=0.920700 loss=0.964861 time: 14.765868 INFO:root:[Epoch 178] train=0.108846 val=0.919100 loss=0.982581 time: 15.553216 INFO:root:[Epoch 179] train=0.108200 val=0.923500 loss=0.973444 time: 15.095054 INFO:root:[Epoch 180] train=0.109918 val=0.919500 loss=0.988498 time: 15.991238 INFO:root:[Epoch 181] train=0.108095 val=0.917600 loss=0.975555 time: 15.697598 INFO:root:[Epoch 182] train=0.107851 val=0.919800 loss=0.961349 time: 15.699680 INFO:root:[Epoch 183] train=0.109229 val=0.918600 loss=0.995267 time: 15.504840 INFO:root:[Epoch 184] train=0.105564 val=0.921700 loss=0.944285 time: 14.714736 INFO:root:[Epoch 185] train=0.108294 val=0.923200 loss=0.964006 time: 15.920093 INFO:root:[Epoch 186] train=0.106905 val=0.920100 loss=0.955520 time: 15.085177 INFO:root:[Epoch 187] train=0.107875 val=0.920700 loss=0.970213 time: 14.955984 INFO:root:[Epoch 188] train=0.105809 val=0.918200 loss=0.946067 time: 15.107093 INFO:root:[Epoch 189] train=0.108244 val=0.920700 loss=0.987433 time: 15.803307 INFO:root:[Epoch 190] train=0.106959 val=0.918600 loss=0.953937 time: 15.103914 INFO:root:[Epoch 191] train=0.109304 val=0.919900 loss=0.989090 time: 15.325271 INFO:root:[Epoch 192] train=0.108481 val=0.920400 loss=0.990325 time: 15.015365 INFO:root:[Epoch 193] train=0.106192 val=0.919300 loss=0.949797 time: 14.864878 INFO:root:[Epoch 194] train=0.106197 val=0.919900 loss=0.945188 time: 14.865268 INFO:root:[Epoch 195] train=0.106942 val=0.922600 loss=0.965709 time: 14.948938 INFO:root:[Epoch 196] train=0.108803 val=0.919800 loss=0.978186 time: 14.834077 INFO:root:[Epoch 197] train=0.108818 val=0.919800 loss=0.990592 time: 14.687156 INFO:root:[Epoch 198] train=0.108320 val=0.920100 loss=0.974416 time: 14.963351 INFO:root:[Epoch 199] train=0.108861 val=0.917300 loss=0.996708 time: 14.860872 INFO:root:[Epoch 200] train=0.070434 val=0.927300 loss=0.122021 time: 15.003160 INFO:root:[Epoch 201] train=0.068877 val=0.926200 loss=0.110607 time: 15.138837 INFO:root:[Epoch 202] train=0.067746 val=0.926700 loss=0.105251 time: 15.004166 INFO:root:[Epoch 203] train=0.067333 val=0.928200 loss=0.102212 time: 15.423245 INFO:root:[Epoch 204] train=0.066254 val=0.926300 loss=0.099062 time: 14.783807 INFO:root:[Epoch 205] train=0.066378 val=0.929000 loss=0.098349 time: 15.281185 INFO:root:[Epoch 206] train=0.064600 val=0.927500 loss=0.093941 time: 14.934905 INFO:root:[Epoch 207] train=0.064767 val=0.927100 loss=0.093336 time: 14.924889 INFO:root:[Epoch 208] train=0.063752 val=0.926300 loss=0.090829 time: 14.914001 INFO:root:[Epoch 209] train=0.063574 val=0.926000 loss=0.089642 time: 14.819296 INFO:root:[Epoch 210] train=0.062782 val=0.927400 loss=0.088220 time: 14.941385 INFO:root:[Epoch 211] train=0.062607 val=0.926500 loss=0.086744 time: 14.837992 INFO:root:[Epoch 212] train=0.062877 val=0.926900 loss=0.087322 time: 15.009884 INFO:root:[Epoch 213] train=0.060959 val=0.927100 loss=0.082820 time: 14.941187 INFO:root:[Epoch 214] train=0.060817 val=0.927200 loss=0.081749 time: 15.125451 INFO:root:[Epoch 215] train=0.060527 val=0.927200 loss=0.080864 time: 15.186069 INFO:root:[Epoch 216] train=0.061007 val=0.927500 loss=0.081336 time: 15.906200 INFO:root:[Epoch 217] train=0.059995 val=0.925600 loss=0.079198 time: 15.792664 INFO:root:[Epoch 218] train=0.059356 val=0.925300 loss=0.077666 time: 15.271790 INFO:root:[Epoch 219] train=0.058320 val=0.926100 loss=0.075233 time: 14.820616