# This is the pre-tuned parameters for cuda backend # TVM downloaded this during compilation {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [192, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [192, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8797896, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [64, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.69732536806342e-05], 0, 14.116215467453003, 1535423916.7184713], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [448, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [448, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 734204, "c": null, "e": [["tile_f", "sp", [28, 2, 8, 1]], ["tile_y", "sp", [2, 2, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [64, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[9.882545781556573e-05], 0, 20.854647159576416, 1535425623.6315196], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [384, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [384, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1240970, "c": null, "e": [["tile_f", "sp", [12, 1, 16, 2]], ["tile_y", "sp", [2, 2, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 64]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[8.448735867117118e-05], 0, 14.069890975952148, 1535426980.678347], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [320, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [320, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5709779, "c": null, "e": [["tile_f", "sp", [10, 1, 16, 2]], ["tile_y", "sp", [2, 1, 2, 2]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 64]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.139676807712908e-05], 0, 3.966081380844116, 1535427983.421136], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [192, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [192, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13233096, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [40, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.6700240116420085e-05], 0, 3.608333110809326, 1535429000.509115], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 448, 8, 8], "float32"], ["TENSOR", [384, 448, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 8, 8, "float32"], [384, 448, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 232980, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [6, 1, 32, 2]], ["tile_x", "sp", [1, 1, 4, 1]], ["tile_rc", "sp", [28, 16]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00011755589655172413], 0, 5.878081798553467, 1535431279.4730268], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [448, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [448, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3731075, "c": null, "e": [["tile_f", "sp", [14, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [32, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.2090259632072194e-05], 0, 1.3185603618621826, 1535433032.3829937], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 3, 1], "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 3, 1, "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {"i": 35416378, "c": null, "e": [["tile_f", "sp", [24, 2, 8, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [16, 24]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.5191243902439025e-05], 0, 1.303452730178833, 1535434456.1185555], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 1, 3], "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 1, 3, "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {"i": 13970090, "c": null, "e": [["tile_f", "sp", [12, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [16, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.3322369696969696e-05], 0, 1.4410252571105957, 1535436450.3439708], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [384, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [384, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5330090, "c": null, "e": [["tile_f", "sp", [12, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [32, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.552532625833839e-05], 0, 1.3521144390106201, 1535437568.5237384], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [320, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [320, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3690668, "c": null, "e": [["tile_f", "sp", [20, 2, 8, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.795118104265403e-05], 0, 1.2909648418426514, 1535438405.1098573], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10006142, "c": null, "e": [["tile_f", "sp", [12, 1, 16, 1]], ["tile_y", "sp", [8, 1, 1, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.094121392190153e-05], 0, 1.2878923416137695, 1535439480.1925032], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [320, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [320, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 28729475, "c": null, "e": [["tile_f", "sp", [10, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [48, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.494733763996555e-05], 0, 1.3043124675750732, 1535440944.6158926], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 776877, "c": null, "e": [["tile_f", "sp", [8, 4, 6, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00016294033007600435], 0, 2.581367254257202, 1535441832.1955879], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 325452, "c": null, "e": [["tile_f", "sp", [8, 1, 6, 4]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00016267638502673795], 0, 1.3517541885375977, 1535442556.6070833], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 88756, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00014167337005649717], 0, 2.514183282852173, 1535443548.5128298], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 449924, "c": null, "e": [["tile_f", "sp", [4, 1, 10, 4]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00011452480775075988], 0, 4.953140497207642, 1535444579.1761086], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 669438, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00014245359474671668], 0, 3.571627140045166, 1535445303.0586505], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 446246, "c": null, "e": [["tile_f", "sp", [5, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00011218677481481482], 0, 1.3602855205535889, 1535446031.3722363], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [160, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [160, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 220455, "c": null, "e": [["tile_f", "sp", [4, 5, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.427650495049505e-05], 0, 1.2197649478912354, 1535446918.1282773], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 61876, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00011426011119573495], 0, 18.739489793777466, 1535447697.4223692], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 129655, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.613525476429288e-05], 0, 8.062903881072998, 1535448397.9567559], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 448948, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00011292374210526315], 0, 2.1787657737731934, 1535449108.6007366], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 129623, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.355220323212537e-05], 0, 3.0357697010040283, 1535449805.546825], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 120055, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [32, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.593256546572935e-05], 0, 10.24105978012085, 1535450762.5778947], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [192, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [192, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 228674, "c": null, "e": [["tile_f", "sp", [8, 2, 3, 4]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [64, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[8.298381360619469e-05], 0, 1.2305030822753906, 1535452260.8384964], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 227677, "c": null, "e": [["tile_f", "sp", [4, 2, 6, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [24, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.262685946622186e-05], 0, 2.3495943546295166, 1535453153.0011835], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [384, 288, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [384, 288, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 426376, "c": null, "e": [["tile_f", "sp", [16, 4, 3, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [144, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00046931365937500003], 0, 5.672942876815796, 1535454505.8647752], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [48, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [48, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2956827, "c": null, "e": [["tile_f", "sp", [2, 6, 4, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [12, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.304847466960352e-05], 0, 9.784206628799438, 1535455827.5935478], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [64, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [64, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 225833, "c": null, "e": [["tile_f", "sp", [2, 4, 4, 2]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [12, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.915220448617632e-05], 0, 1.400207281112671, 1535456732.6478782], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [48, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [48, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1129045, "c": null, "e": [["tile_f", "sp", [2, 2, 4, 3]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.0527908595641644e-05], 0, 2.0959362983703613, 1535457623.5808153], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 462356, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.608307039884671e-05], 0, 7.058563470840454, 1535458449.1994894], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [32, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [32, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 322573, "c": null, "e": [["tile_f", "sp", [2, 4, 4, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [8, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.7956799473873014e-05], 0, 1.3385558128356934, 1535459233.2346888], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1352202, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [6, 2, 2, 4]], ["tile_x", "sp", [1, 3, 108, 1]], ["tile_rc", "sp", [12, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[5.601382798507463e-05], 0, 11.555405378341675, 1535461246.940517], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 35, 35], "float32"], ["TENSOR", [96, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 35, 35, "float32"], [96, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1901044, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [6, 1, 2, 8]], ["tile_x", "sp", [1, 3, 108, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[4.3254536418166237e-05], 0, 1.3233392238616943, 1535463738.0164495], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 48, 35, 35], "float32"], ["TENSOR", [64, 48, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 35, 35, "float32"], [64, 48, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 3275017, "c": null, "e": [["tile_f", "sp", [2, 1, 32, 1]], ["tile_y", "sp", [5, 1, 1, 7]], ["tile_x", "sp", [7, 1, 5, 1]], ["tile_rc", "sp", [16, 3]], ["tile_ry", "sp", [1, 5]], ["tile_rx", "sp", [1, 5]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.947820815677967e-05], 0, 1.3620657920837402, 1535466035.195244], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [48, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [48, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1774165, "c": null, "e": [["tile_f", "sp", [2, 2, 4, 3]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [12, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.3459930018703244e-05], 0, 4.270485162734985, 1535466912.1237135], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [64, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [64, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1085972, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [8, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.796061236268851e-05], 0, 1.4543447494506836, 1535467856.116219], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 80, 73, 73], "float32"], ["TENSOR", [192, 80, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 80, 73, 73, "float32"], [192, 80, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 922093, "c": null, "e": [["tile_f", "sp", [4, 8, 3, 2]], ["tile_y", "sp", [71, 1, 1, 1]], ["tile_x", "sp", [1, 1, 71, 1]], ["tile_rc", "sp", [40, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0006553213608695652], 0, 1.5329456329345703, 1535470367.360386], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 73, 73], "float32"], ["TENSOR", [80, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 73, 73, "float32"], [80, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 70574, "c": null, "e": [["tile_f", "sp", [5, 8, 2, 1]], ["tile_y", "sp", [73, 1, 1, 1]], ["tile_x", "sp", [1, 1, 73, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.883320232858991e-05], 0, 1.5573954582214355, 1535471271.1996536], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 32, 147, 147], "float32"], ["TENSOR", [64, 32, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 147, 147, "float32"], [64, 32, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 8943001, "c": null, "e": [["tile_f", "sp", [2, 1, 32, 1]], ["tile_y", "sp", [21, 1, 1, 7]], ["tile_x", "sp", [7, 1, 7, 3]], ["tile_rc", "sp", [32, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00025052284166666663], 0, 7.156659126281738, 1535472591.5201485], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 32, 149, 149], "float32"], ["TENSOR", [32, 32, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 149, 149, "float32"], [32, 32, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6050372, "c": null, "e": [["tile_f", "sp", [1, 1, 32, 1]], ["tile_y", "sp", [21, 7, 1, 1]], ["tile_x", "sp", [7, 1, 7, 3]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00012198324857839155], 0, 3.08567214012146, 1535473653.6231658], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 3, 299, 299], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 299, 299, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 23791, "c": null, "e": [["tile_f", "sp", [2, 2, 1, 8]], ["tile_y", "sp", [149, 1, 1, 1]], ["tile_x", "sp", [1, 1, 149, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.216306946826758e-05], 0, 5.255294322967529, 1535474702.044478], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 117516, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 4, 16, 1]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [64, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00018763357], 0, 1.6003353595733643, 1535405501.9874778], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1172502, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 2, 8, 4]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [64, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0005314307929824561], 0, 15.780593156814575, 1535406995.4651527], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1062454, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [16, 4, 4, 2]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0002953229862475442], 0, 1.9394707679748535, 1535408976.5928106], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 796885, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [4, 2, 8, 4]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00037394815158924206], 0, 3.6207523345947266, 1535410469.9387088], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 714385, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [4, 2, 8, 4]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00023493554432348364], 0, 4.635826826095581, 1535411899.2398431], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1813376, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 8, 2]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00042691407670454546], 0, 19.30599021911621, 1535414354.3659015], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1603376, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 8, 2]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0002890588754789272], 0, 3.4835450649261475, 1535415894.133812], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2195318, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 2, 8, 4]], ["tile_x", "sp", [49, 4, 16, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0006257575269709544], 0, 5.687942981719971, 1535418141.8687968], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 59919263, "c": null, "e": [["tile_f", "sp", [1, 2, 16, 2]], ["tile_y", "sp", [56, 1, 1, 4]], ["tile_x", "sp", [7, 2, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.754253240324032e-05], 0, 1.5378711223602295, 1535422580.7674332], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 2048, 7, 7], "float32"], ["TENSOR", [512, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 7, 7, "float32"], [512, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 58555, "c": null, "e": [["tile_f", "sp", [16, 2, 16, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00015904646927966104], 0, 1.3764936923980713, 1535340979.3568566], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [2048, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [2048, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 143095, "c": null, "e": [["tile_f", "sp", [64, 2, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.65306638061585e-05], 0, 2.2808291912078857, 1535342441.0873826], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [2048, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [2048, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 335004, "c": null, "e": [["tile_f", "sp", [16, 2, 32, 2]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [256, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00028060819402985073], 0, 1.6567697525024414, 1535343548.7764354], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 132040, "c": null, "e": [["tile_f", "sp", [16, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.38442889305816e-05], 0, 1.886913776397705, 1535346388.4123077], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [256, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [256, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2521885, "c": null, "e": [["tile_f", "sp", [16, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.780090051813472e-05], 0, 2.096433401107788, 1535347370.988212], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [1024, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [1024, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2965852, "c": null, "e": [["tile_f", "sp", [32, 4, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.840749328644501e-05], 0, 34.52979779243469, 1535349067.3907256], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [1024, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1723190, "c": null, "e": [["tile_f", "sp", [16, 4, 16, 1]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00011870797707509881], 0, 6.502109050750732, 1535351117.3144069], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1795299, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 4]], ["tile_y", "sp", [14, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.030473995983936e-05], 0, 14.208871603012085, 1535355244.8065631], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6652823, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.824087160589604e-05], 0, 8.829635620117188, 1535357359.3047445], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [512, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [512, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4022069, "c": null, "e": [["tile_f", "sp", [16, 4, 8, 1]], ["tile_y", "sp", [7, 4, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.836637116564418e-05], 0, 30.88849425315857, 1535359778.4165032], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13520886, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.964673237753884e-05], 0, 21.609076023101807, 1535362579.700074], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7564855, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.5890965484408476e-05], 0, 7.409880638122559, 1535366580.0144885], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6625940, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 4, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.789036865624003e-05], 0, 1.2796452045440674, 1535367873.0130634], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [256, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [256, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3542617, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 2]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.3173960469779435e-05], 0, 19.429253578186035, 1535370537.716797], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 222539, "c": null, "e": [["tile_f", "sp", [64, 2, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00012062812861736334], 0, 1.9216992855072021, 1535376563.6045246], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 25761, "c": null, "e": [["tile_f", "sp", [256, 1, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.3759017449004675e-06], 0, 4.439143419265747, 1535377557.3893585], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [1024, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 108194, "c": null, "e": [["tile_f", "sp", [64, 2, 4, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.706907663125948e-05], 0, 1.3412449359893799, 1535379150.708214], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 19810, "c": null, "e": [["tile_f", "sp", [256, 1, 2, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.498452183395762e-06], 0, 1.8959953784942627, 1535380034.9811802], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 771840, "c": null, "e": [["tile_f", "sp", [16, 2, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.098708732737612e-05], 0, 2.6358609199523926, 1535381939.087032], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 310860, "c": null, "e": [["tile_f", "sp", [512, 1, 1, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.404539118416505e-06], 0, 1.145503282546997, 1535383090.2789602], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 715461, "c": null, "e": [["tile_f", "sp", [32, 4, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.3200288139329807e-05], 0, 5.702603101730347, 1535384171.1888306], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 191400, "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.514348936170213e-06], 0, 1.3499267101287842, 1535385287.8302827], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3339832, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 2]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.51544672033393e-05], 0, 1.2489631175994873, 1535386621.2483017], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1172160, "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [2, 1, 7, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.0338754319654428e-05], 0, 5.082122564315796, 1535387899.0255547], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5190596, "c": null, "e": [["tile_f", "sp", [8, 4, 8, 1]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.6632782280701753e-05], 0, 3.803677797317505, 1535390486.2065094], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 894840, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [1, 1, 28, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.2479111885844008e-05], 0, 8.535689115524292, 1535391579.1142876], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4126080, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [2, 1, 4, 7]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.559869505094614e-05], 0, 3.6279289722442627, 1535395987.3606555], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7948975, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.3504540112817302e-05], 0, 1.3222031593322754, 1535397725.9387324], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 112, 112, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 2886240, "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [7, 1, 4, 2]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.393993827751196e-05], 0, 1.3483922481536865, 1535398924.3910995], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 23485665, "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [28, 2, 2, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [8, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.6321584531743247e-05], 0, 1.2850267887115479, 1535400525.906705], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 112, 112, "float32"], [32, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4740848, "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [8, 1, 2, 7]], ["tile_x", "sp", [1, 1, 112, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5317467108618054e-05], 0, 1.272824764251709, 1535401884.0289147], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 41999093, "c": null, "e": [["tile_f", "sp", [1, 1, 8, 4]], ["tile_y", "sp", [16, 7, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.3977960968793665e-05], 0, 1.3667681217193604, 1535403596.0402923], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 344206, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 1, 16, 4]], ["tile_x", "sp", [1, 1, 8, 2]], ["tile_rc", "sp", [32, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[8.610198970251716e-05], 0, 1.4480016231536865, 1535511176.105762], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 110915, "c": null, "e": [["tile_f", "sp", [16, 2, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.729347361717613e-05], 0, 1.236013412475586, 1535512971.3886647], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736155, "c": null, "e": [["tile_f", "sp", [16, 2, 16, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [7, 1, 1, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00016977524406779662], 0, 2.0983633995056152, 1535514397.926689], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 66732, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 2, 16, 2]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [16, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[7.205142459173871e-05], 0, 1.4618885517120361, 1535515568.3814023], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 528026, "c": null, "e": [["tile_f", "sp", [8, 4, 8, 1]], ["tile_y", "sp", [14, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.621085209760274e-05], 0, 10.320522785186768, 1535516689.8571618], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 5183051, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.29051653250774e-05], 0, 1.2669603824615479, 1535518191.0489676], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 520156, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 2, 4, 4]], ["tile_x", "sp", [1, 2, 98, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[6.1538798195242e-05], 0, 2.2867848873138428, 1535519772.1041968], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1996855, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.2646983313669309e-05], 0, 1.2524137496948242, 1535521080.8447835], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 31178187, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.736485554060117e-05], 0, 1.3498356342315674, 1535522336.8640018], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13628176, "c": null, "e": [["tile_f", "sp", [2, 8, 4, 1]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5144373877056628e-05], 0, 4.291016101837158, 1535524033.4773188], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 263327, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 1, 4, 8]], ["tile_x", "sp", [1, 2, 98, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[6.067211160533765e-05], 0, 2.9739716053009033, 1535525609.763939], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 992, 7, 7], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 7, 7, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 100826, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [62, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.591533125897558e-05], 0, 1.1918675899505615, 1535541649.1359475], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 7, 7, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 73946, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [60, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.460836556169429e-05], 0, 18.39060950279236, 1535542453.8870785], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 928, 7, 7], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 7, 7, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 77786, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.402473731477324e-05], 0, 1.248830795288086, 1535543187.1269765], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 896, 7, 7], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 7, 7, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 102746, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 14]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.9833012105576504e-05], 0, 1.2429931163787842, 1535543904.137366], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 864, 7, 7], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 7, 7, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 200666, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [54, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.1719856631578944e-05], 0, 10.93128776550293, 1535544717.37846], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 832, 7, 7], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 7, 7, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10586, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.109051445683602e-05], 0, 1.3161144256591797, 1535545449.2476206], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 800, 7, 7], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 7, 7, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 16346, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 25]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.168217405126168e-05], 0, 2.2161407470703125, 1535546141.5243394], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 768, 7, 7], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 7, 7, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 14426, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.887415226179018e-05], 0, 1.545259714126587, 1535546886.594689], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 736, 7, 7], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 7, 7, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10581, "c": null, "e": [["tile_f", "sp", [16, 1, 8, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 23]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.998375162548765e-05], 0, 3.117043972015381, 1535547588.8117917], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 704, 7, 7], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 7, 7, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 64581, "c": null, "e": [["tile_f", "sp", [16, 1, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [44, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.866922851002865e-05], 0, 4.251639366149902, 1535548336.165861], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 672, 7, 7], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 7, 7, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 123861, "c": null, "e": [["tile_f", "sp", [16, 1, 8, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [12, 56]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.3642525614914618e-05], 0, 14.377593994140625, 1535549085.618711], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 640, 7, 7], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 7, 7, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 12506, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [40, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.506711887295765e-05], 0, 2.169248342514038, 1535549800.977766], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 608, 7, 7], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 7, 7, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 54981, "c": null, "e": [["tile_f", "sp", [16, 1, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [38, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.5828701796599296e-05], 0, 1.7529754638671875, 1535550758.379618], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 576, 7, 7], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 7, 7, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 135386, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.3415936738157693e-05], 0, 3.331063747406006, 1535551509.8037014], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 544, 7, 7], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 7, 7, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 54981, "c": null, "e": [["tile_f", "sp", [16, 1, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [34, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.3128844173042964e-05], 0, 13.201400756835938, 1535552201.2710478], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 7, 7], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 7, 7, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 77269, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 1, 8, 4]], ["tile_x", "sp", [1, 1, 16, 1]], ["tile_rc", "sp", [1, 128]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[1.2395253346554289e-05], 0, 1.358180284500122, 1535553731.9634063], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 85466, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.104165770137252e-05], 0, 1.2656316757202148, 1535554370.9764986], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2686720, "c": null, "e": [["tile_f", "sp", [16, 2, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [128, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00011719296046511628], 0, 6.276942491531372, 1535555350.7657874], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 992, 14, 14], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 14, 14, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 507382, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [62, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.409438910926794e-05], 0, 12.190387964248657, 1535556229.9904573], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 960, 14, 14], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 14, 14, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4783222, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [24, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.688655548743102e-05], 0, 12.86388874053955, 1535557808.5567095], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 928, 14, 14], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 14, 14, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 507382, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.016689842966923e-05], 0, 1.346564769744873, 1535558795.3216088], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 896, 14, 14], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 14, 14, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 728214, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.165852137701277e-05], 0, 1.437685489654541, 1535560456.7344701], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 864, 14, 14], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 14, 14, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1060336, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [36, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.687965669339914e-05], 0, 8.346641778945923, 1535561443.6733437], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 832, 14, 14], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 14, 14, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 169462, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.555442124430956e-05], 0, 5.441713571548462, 1535562357.9449513], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 800, 14, 14], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 14, 14, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3093622, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [20, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.046558225458468e-05], 0, 15.71262526512146, 1535563464.8701277], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 768, 14, 14], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 14, 14, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 230902, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.234744090267983e-05], 0, 6.256928205490112, 1535564394.6937435], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 736, 14, 14], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 14, 14, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 138742, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [46, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.1410453554889675e-05], 0, 6.182973146438599, 1535565178.7289805], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 704, 14, 14], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 14, 14, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 538129, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [88, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.870800489312387e-05], 0, 2.2132797241210938, 1535565942.2794137], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 672, 14, 14], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 14, 14, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1864854, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [24, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.229561833046472e-05], 0, 1.3550889492034912, 1535567724.982913], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 640, 14, 14], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 14, 14, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1219702, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.477221223354958e-05], 0, 1.387446641921997, 1535568750.6347365], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 608, 14, 14], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 14, 14, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 476656, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [76, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.4095696955929124e-05], 0, 4.452491521835327, 1535569599.26808], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 14, 14, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1521136, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [48, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.237845636867933e-05], 0, 1.8098704814910889, 1535570428.9661424], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 544, 14, 14], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 14, 14, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1213969, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [68, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.1499605432303545e-05], 0, 2.1479368209838867, 1535571183.5289185], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 108049, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.9696199288256228e-05], 0, 1.2669003009796143, 1535571947.9111545], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 480, 14, 14], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 14, 14, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4168854, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [12, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.56696117003079e-05], 0, 10.758132457733154, 1535573553.205647], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 448, 14, 14], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 14, 14, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1096854, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [16, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.320895284327323e-05], 0, 1.196434497833252, 1535575073.1209912], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 416, 14, 14], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 14, 14, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1213936, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [52, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.5349264418212478e-05], 0, 12.102940082550049, 1535575957.618665], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1644016, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [48, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.3481232681782646e-05], 0, 4.663531303405762, 1535576847.956911], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 352, 14, 14], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 14, 14, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 108016, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [44, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.2001159360797536e-05], 0, 2.2442550659179688, 1535577776.5404787], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 320, 14, 14], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 14, 14, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1951216, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.9427484680025856e-05], 0, 2.6154847145080566, 1535578615.708037], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 14, 14, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 169489, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [36, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.9033601698137118e-05], 0, 2.3652172088623047, 1535579507.9600186], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 20361, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 1, 8, 2]], ["tile_x", "sp", [7, 1, 7, 1]], ["tile_rc", "sp", [8, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[2.5000261515844497e-05], 0, 12.293764114379883, 1535580383.487767], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 937489, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.758315576652602e-05], 0, 8.173214673995972, 1535581094.4785075], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3606637, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 2]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[8.120121898597627e-05], 0, 1.2265729904174805, 1535582613.3266957], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 480, 28, 28], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 28, 28, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6024857, "c": null, "e": [["tile_f", "sp", [8, 4, 4, 1]], ["tile_y", "sp", [7, 2, 2, 1]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [48, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.305960671378092e-05], 0, 1.3379249572753906, 1535583757.1784189], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 448, 28, 28], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 28, 28, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9148823, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [32, 14]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.7931751037344395e-05], 0, 1.3435583114624023, 1535585470.9696934], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 416, 28, 28], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 28, 28, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5692823, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [26, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.826996172402702e-05], 0, 1.3278939723968506, 1535587266.8153222], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 384, 28, 28], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 28, 28, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7368199, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [32, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.423708785377358e-05], 0, 12.33829402923584, 1535590002.951454], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 352, 28, 28], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 28, 28, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5692823, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["tile_rc", "sp", [22, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.094261456628478e-05], 0, 2.164489984512329, 1535590750.0318356], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 320, 28, 28], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 28, 28, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9491839, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.790092597271349e-05], 0, 6.321907997131348, 1535591875.0671048], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 288, 28, 28], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 28, 28, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 15249775, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [24, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.548012210675484e-05], 0, 11.097039699554443, 1535592960.3593595], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2385743, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.167456558414171e-05], 0, 3.707690954208374, 1535594113.3787842], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 224, 28, 28], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 28, 28, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 12561814, "c": null, "e": [["tile_f", "sp", [4, 1, 4, 8]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 14]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.746549643705463e-05], 0, 6.304758310317993, 1535595088.5418801], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 28, 28, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3912199, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [16, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.471388193643998e-05], 0, 1.2050926685333252, 1535597295.1675448], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 28, 28], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 28, 28, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8339839, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.1152598169014084e-05], 0, 1.3271257877349854, 1535598536.5009234], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 209989, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 1, 8, 4]], ["tile_x", "sp", [7, 1, 14, 2]], ["tile_rc", "sp", [4, 32]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[2.9420414022718368e-05], 0, 1.34669828414917, 1535600086.3033242], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2380823, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.8549898443291326e-05], 0, 1.366112232208252, 1535601471.6817746], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9488423, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.121302610346464e-05], 0, 2.1284446716308594, 1535604383.0814273], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 224, 56, 56], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 56, 56, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21776455, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [28, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.357535901778154e-05], 0, 2.2265069484710693, 1535605660.2191901], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 192, 56, 56], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 56, 56, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 46344983, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [14, 1, 1, 4]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.734667162471396e-05], 0, 11.561939001083374, 1535606906.8334565], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 160, 56, 56], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 56, 56, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13273108, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [7, 2, 1, 4]], ["tile_x", "sp", [7, 1, 8, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.074225447909754e-05], 0, 5.721418619155884, 1535608547.5546439], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2525783, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.997187543252595e-05], 0, 1.9399163722991943, 1535610416.8082263], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [128, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 56, 56, "float32"], [128, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21716480, "c": null, "e": [["tile_f", "sp", [2, 2, 8, 4]], ["tile_y", "sp", [7, 4, 2, 1]], ["tile_x", "sp", [7, 1, 8, 1]], ["tile_rc", "sp", [16, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.383850146429376e-05], 0, 6.234032154083252, 1535611391.4232936], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 204395, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 8]], ["tile_x", "sp", [7, 1, 14, 2]], ["tile_rc", "sp", [8, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[7.82145242313705e-05], 0, 2.6251773834228516, 1535612238.94909], "v": 0.1} {"i": ["cuda -model=titanx", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"i": 3739701, "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [1, 7, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[8.801264570803718e-05], 0, 1.4477019309997559, 1535614680.9461718], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 2048, 7, 7], "float32"], ["TENSOR", [512, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 7, 7, "float32"], [512, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 184840, "c": null, "e": [["tile_f", "sp", [16, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.870914716312056e-05], 0, 1.8239610195159912, 1535340502.6141953], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [2048, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [2048, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 28435, "c": null, "e": [["tile_f", "sp", [64, 2, 16, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.277302426160337e-05], 0, 1.2559425830841064, 1535341522.856728], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [2048, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [2048, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 276692, "c": null, "e": [["tile_f", "sp", [16, 4, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00014070911287313433], 0, 1.2780311107635498, 1535342217.8151824], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 267206, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 1, 16, 4]], ["tile_x", "sp", [1, 1, 8, 2]], ["tile_rc", "sp", [32, 16]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[5.8324577519379845e-05], 0, 1.5616421699523926, 1535343022.7710319], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 54600, "c": null, "e": [["tile_f", "sp", [16, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.612890325552826e-05], 0, 1.328430414199829, 1535343755.3037415], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [256, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [256, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1120051, "c": null, "e": [["tile_f", "sp", [8, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.8724735294117643e-05], 0, 1.302473783493042, 1535344763.7252576], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [1024, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [1024, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1589056, "c": null, "e": [["tile_f", "sp", [16, 4, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.555625907000511e-05], 0, 1.39487886428833, 1535345590.4587004], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [1024, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3917436, "c": null, "e": [["tile_f", "sp", [32, 2, 8, 2]], ["tile_y", "sp", [7, 2, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.519168720173537e-05], 0, 5.207885265350342, 1535347591.9824076], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 35585, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [16, 2, 1, 8]], ["tile_x", "sp", [1, 1, 49, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[4.633917572575664e-05], 0, 2.31782603263855, 1535348964.346931], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 147911, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 2]], ["tile_y", "sp", [14, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.6686891858624934e-05], 0, 1.2715818881988525, 1535349821.0645304], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2579787, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.9005322125603865e-05], 0, 4.739739656448364, 1535351179.3912873], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [512, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [512, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9659582, "c": null, "e": [["tile_f", "sp", [4, 4, 32, 1]], ["tile_y", "sp", [7, 1, 1, 4]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.9628529266067004e-05], 0, 1.2400987148284912, 1535352477.7351887], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1152842, "c": null, "e": [["tile_f", "sp", [4, 4, 32, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.6994346804511274e-05], 0, 1.7129621505737305, 1535353304.6144009], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 435771, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 4, 4, 2]], ["tile_x", "sp", [2, 2, 49, 1]], ["tile_rc", "sp", [8, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[3.5854004442366145e-05], 0, 1.4333581924438477, 1535354268.01083], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7756834, "c": null, "e": [["tile_f", "sp", [1, 2, 64, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.339707406829877e-05], 0, 1.3113553524017334, 1535355686.5242553], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 16853780, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.6182050362193858e-05], 0, 1.6612598896026611, 1535356896.8242538], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [256, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [256, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 26764352, "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [14, 1, 1, 4]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.3482599499765514e-05], 0, 5.5789735317230225, 1535358026.6404517], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"i": 43253325, "c": null, "e": [["tile_f", "sp", [1, 4, 8, 2]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [1, 7, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.1339853208482515e-05], 0, 1.3942315578460693, 1535361338.131421], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 121867, "c": null, "e": [["tile_f", "sp", [64, 2, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.052611001410438e-05], 0, 1.422020435333252, 1535362447.7432306], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 25761, "c": null, "e": [["tile_f", "sp", [256, 1, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.5548221728452815e-06], 0, 1.1385302543640137, 1535363136.5671299], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [1024, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 153954, "c": null, "e": [["tile_f", "sp", [64, 2, 4, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.496040041879944e-05], 0, 3.2046194076538086, 1535363578.4696045], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 19800, "c": null, "e": [["tile_f", "sp", [512, 1, 1, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.5496212216845505e-06], 0, 2.240983724594116, 1535364179.7086353], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1324913, "c": null, "e": [["tile_f", "sp", [32, 2, 4, 2]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.495791946107784e-05], 0, 8.172152519226074, 1535364741.963095], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 254540, "c": null, "e": [["tile_f", "sp", [512, 1, 1, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.878607927128222e-06], 0, 1.8255438804626465, 1535365493.1087449], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 198461, "c": null, "e": [["tile_f", "sp", [32, 4, 4, 1]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.3922045382165605e-05], 0, 3.5875909328460693, 1535366144.9359715], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 191400, "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.2485037789792506e-06], 0, 1.1708734035491943, 1535366785.9483972], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1168397, "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.2439865390367933e-05], 0, 4.479329586029053, 1535368220.7407424], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1172325, "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 2, 7, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.0971720167125235e-06], 0, 1.5838329792022705, 1535368964.1501336], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7504397, "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.2662810974597377e-05], 0, 1.6510009765625, 1535370215.2665138], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 852720, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.131742867638056e-06], 0, 3.9804959297180176, 1535370929.9425554], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21004976, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.1988892341356673e-05], 0, 3.9282212257385254, 1535371747.987445], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 3356280, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [7, 2, 1, 4]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.514915720717863e-06], 0, 1.3253803253173828, 1535372645.1413662], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3340948, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.1697967719298246e-05], 0, 4.008930683135986, 1535373250.6288095], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 112, 112, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 2346036, "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [14, 2, 2, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5159026368760064e-05], 0, 2.2290358543395996, 1535373978.812705], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7059804, "c": null, "e": [["tile_f", "sp", [1, 4, 16, 1]], ["tile_y", "sp", [56, 1, 1, 2]], ["tile_x", "sp", [2, 2, 28, 1]], ["tile_rc", "sp", [2, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.749397590782123e-05], 0, 5.936538457870483, 1535375090.9615788], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 112, 112, "float32"], [32, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4741632, "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [7, 1, 2, 8]], ["tile_x", "sp", [1, 1, 112, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.589733033477568e-06], 0, 1.2825348377227783, 1535375960.1960888], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 33218209, "c": null, "e": [["tile_f", "sp", [1, 4, 8, 1]], ["tile_y", "sp", [28, 4, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.010032462686567e-06], 0, 1.3537733554840088, 1535377010.26167], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 75716, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 4, 16, 1]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [32, 16]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00011470169923664122], 0, 1.6527025699615479, 1535378530.7402716], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1169651, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 2, 32, 4]], ["tile_x", "sp", [7, 7, 4, 1]], ["tile_rc", "sp", [64, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0002516921139028475], 0, 2.42842698097229, 1535379736.2749858], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 861650, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 1, 32, 4]], ["tile_x", "sp", [7, 7, 4, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0001474504529411765], 0, 1.4133880138397217, 1535380740.653147], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 332746, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 1, 32, 4]], ["tile_x", "sp", [7, 7, 4, 1]], ["tile_rc", "sp", [64, 4]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[0.00022330364635958397], 0, 8.144659042358398, 1535381829.4200964], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 448246, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 1, 32, 4]], ["tile_x", "sp", [7, 7, 4, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0001520982654508612], 0, 5.780686140060425, 1535382802.5636008], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1813376, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 8, 2]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00027669260477941176], 0, 6.847986459732056, 1535385301.3655443], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1309376, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 8, 2]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00020200159623149397], 0, 4.39305853843689, 1535386443.8930204], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2124768, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 2, 4, 8]], ["tile_x", "sp", [49, 4, 16, 1]], ["tile_rc", "sp", [16, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00048387853225806453], 0, 7.0284013748168945, 1535388162.2723079], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 128325251, "c": null, "e": [["tile_f", "sp", [1, 2, 16, 2]], ["tile_y", "sp", [112, 2, 1, 1]], ["tile_x", "sp", [7, 1, 16, 2]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.3269261095100864e-05], 0, 1.526334524154663, 1535389977.55576], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 110920, "c": null, "e": [["tile_f", "sp", [16, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.3203476454537452e-05], 0, 1.3433687686920166, 1535350110.543228], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 737474, "c": null, "e": [["tile_f", "sp", [32, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.82280392670157e-05], 0, 2.3709828853607178, 1535350877.6429849], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1161671, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 2]], ["tile_y", "sp", [14, 1, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.388735292477555e-06], 0, 1.473917007446289, 1535353812.2598486], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 5183051, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [32, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.732052392947103e-05], 0, 1.3970696926116943, 1535355518.4190004], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4876831, "c": null, "e": [["tile_f", "sp", [2, 2, 32, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.025047433035715e-06], 0, 1.389564037322998, 1535358581.229104], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 25804827, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.629003140096618e-05], 0, 1.433337926864624, 1535360709.7012227], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2338581, "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.529723254546355e-06], 0, 2.2757744789123535, 1535362192.8535852], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 28125, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 2, 2, 8]], ["tile_x", "sp", [1, 2, 98, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[3.7446556775642626e-05], 0, 3.851289749145508, 1535364277.7394183], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [192, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [192, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8797896, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [64, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.1822753712346206e-05], 0, 1.297123670578003, 1535391282.1496253], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [448, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [448, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 868691, "c": null, "e": [["tile_f", "sp", [14, 1, 16, 2]], ["tile_y", "sp", [2, 2, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 64]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.868886900129702e-05], 0, 6.360664129257202, 1535392201.6822503], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [384, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [384, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8156810, "c": null, "e": [["tile_f", "sp", [12, 1, 16, 2]], ["tile_y", "sp", [2, 1, 2, 2]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 64]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.840593352694418e-05], 0, 1.2624528408050537, 1535392929.3049552], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [320, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [320, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5573259, "c": null, "e": [["tile_f", "sp", [40, 2, 4, 1]], ["tile_y", "sp", [2, 1, 4, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [64, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.779546543485186e-05], 0, 1.4392306804656982, 1535394058.0221245], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [192, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [192, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8394696, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [40, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.04307966765739e-05], 0, 5.328145265579224, 1535394950.332171], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 448, 8, 8], "float32"], ["TENSOR", [384, 448, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 8, 8, "float32"], [384, 448, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 368726, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [12, 1, 32, 1]], ["tile_x", "sp", [1, 1, 2, 2]], ["tile_rc", "sp", [28, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[8.091512297734628e-05], 0, 3.577399730682373, 1535396588.173184], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [448, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [448, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13407875, "c": null, "e": [["tile_f", "sp", [14, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [32, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.643936534740545e-05], 0, 1.5609734058380127, 1535398040.0147324], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 3, 1], "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 3, 1, "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {"i": 10648394, "c": null, "e": [["tile_f", "sp", [12, 2, 16, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [24, 16]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.7830928399183824e-05], 0, 1.3832390308380127, 1535398954.3528938], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 1, 3], "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 1, 3, "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {"i": 26392394, "c": null, "e": [["tile_f", "sp", [12, 2, 16, 1]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [12, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.5432101456639564e-05], 0, 1.7413804531097412, 1535399941.1696503], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [384, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [384, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1683420, "c": null, "e": [["tile_f", "sp", [12, 4, 8, 1]], ["tile_y", "sp", [2, 1, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [40, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.873922139303483e-05], 0, 1.2541792392730713, 1535400723.5157862], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [320, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [320, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3731075, "c": null, "e": [["tile_f", "sp", [10, 1, 16, 2]], ["tile_y", "sp", [4, 1, 2, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [32, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.614155295545155e-05], 0, 1.513995885848999, 1535402527.1831696], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2614152, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [8, 1, 1, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.91711075310559e-05], 0, 1.7116973400115967, 1535404126.9687028], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [320, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [320, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6364244, "c": null, "e": [["tile_f", "sp", [10, 1, 32, 1]], ["tile_y", "sp", [4, 2, 1, 1]], ["tile_x", "sp", [2, 1, 4, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.243342838654012e-05], 0, 1.4600000381469727, 1535405934.3806696], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 626349, "c": null, "e": [["tile_f", "sp", [8, 4, 6, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.678651497695853e-05], 0, 3.809021234512329, 1535407164.0710874], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 776877, "c": null, "e": [["tile_f", "sp", [8, 4, 6, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.552685176738883e-05], 0, 4.9299421310424805, 1535408070.3316078], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 540340, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.962713978985839e-05], 0, 2.6122801303863525, 1535408801.494736], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 234784, "c": null, "e": [["tile_f", "sp", [8, 4, 5, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.338450232361639e-05], 0, 6.433482646942139, 1535409694.7188215], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 733989, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 3]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.02349209171736e-05], 0, 1.5989787578582764, 1535410517.0261497], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 19838, "c": null, "e": [["tile_f", "sp", [8, 1, 5, 4]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.227214694894146e-05], 0, 6.451417684555054, 1535411577.5658758], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [160, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [160, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 220455, "c": null, "e": [["tile_f", "sp", [4, 5, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.308726506364922e-05], 0, 5.126100778579712, 1535412695.7309365], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 147966, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.653925856228829e-05], 0, 1.454458475112915, 1535413420.592295], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 22102, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.2646732013385384e-05], 0, 1.247044324874878, 1535414154.7640848], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 233982, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.647298758465011e-05], 0, 2.7552649974823, 1535415102.1237185], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 175734, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.234621726941408e-05], 0, 4.006468772888184, 1535415876.2920923], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 45174, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 2]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [96, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.568510146989094e-05], 0, 2.430988311767578, 1535416680.345598], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [192, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [192, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 35137, "c": null, "e": [["tile_f", "sp", [16, 1, 3, 4]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [64, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.250999441808541e-05], 0, 1.5181522369384766, 1535418207.809458], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 664874, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [12, 8]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.5635400142483974e-05], 0, 2.2501699924468994, 1535419527.4759367], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [384, 288, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [384, 288, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 979233, "c": null, "e": [["tile_f", "sp", [16, 8, 3, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [144, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002292356246200608], 0, 4.310108661651611, 1535420473.161267], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [48, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [48, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2741780, "c": null, "e": [["tile_f", "sp", [4, 4, 3, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [48, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.7069100853548967e-05], 0, 1.666454553604126, 1535421234.2659154], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [64, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [64, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 223628, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [5, 1, 7, 1]], ["tile_x", "sp", [5, 1, 7, 1]], ["tile_rc", "sp", [12, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.0876184275521405e-05], 0, 1.3121867179870605, 1535422386.6796708], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [48, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [48, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 161306, "c": null, "e": [["tile_f", "sp", [3, 4, 4, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.6682681009561928e-05], 0, 1.4955830574035645, 1535423237.755808], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 75304, "c": null, "e": [["tile_f", "sp", [4, 2, 4, 2]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.9178238174379953e-05], 0, 2.4978082180023193, 1535423921.9796646], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [32, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [32, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 120301, "c": null, "e": [["tile_f", "sp", [2, 4, 4, 1]], ["tile_y", "sp", [7, 1, 5, 1]], ["tile_x", "sp", [5, 1, 7, 1]], ["tile_rc", "sp", [8, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.0797558232075604e-05], 0, 1.6644856929779053, 1535425238.4115841], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 4409403, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [3, 1, 4, 8]], ["tile_x", "sp", [1, 4, 81, 1]], ["tile_rc", "sp", [4, 24]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[3.319664817679558e-05], 0, 2.4141483306884766, 1535427557.7824433], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 35, 35], "float32"], ["TENSOR", [96, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 35, 35, "float32"], [96, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2449305, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [3, 4, 4, 2]], ["tile_x", "sp", [1, 4, 81, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[2.5309661492238357e-05], 0, 1.4485929012298584, 1535429272.3281975], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 48, 35, 35], "float32"], ["TENSOR", [64, 48, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 35, 35, "float32"], [64, 48, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 4140319, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 2]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [16, 3]], ["tile_ry", "sp", [1, 5]], ["tile_rx", "sp", [1, 5]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.883521648815319e-05], 0, 9.28942060470581, 1535430556.1709695], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [48, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [48, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 663060, "c": null, "e": [["tile_f", "sp", [4, 4, 3, 1]], ["tile_y", "sp", [35, 1, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.1977059175372836e-05], 0, 2.836064100265503, 1535431600.069139], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [64, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [64, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1083788, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [5, 1, 7, 1]], ["tile_x", "sp", [5, 1, 7, 1]], ["tile_rc", "sp", [8, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.4761395499656088e-05], 0, 1.3095550537109375, 1535432754.9066033], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 80, 73, 73], "float32"], ["TENSOR", [192, 80, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 80, 73, 73, "float32"], [192, 80, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 922099, "c": null, "e": [["tile_f", "sp", [6, 4, 4, 2]], ["tile_y", "sp", [71, 1, 1, 1]], ["tile_x", "sp", [1, 1, 71, 1]], ["tile_rc", "sp", [40, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00037999983291139237], 0, 1.333446979522705, 1535433665.455902], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 64, 73, 73], "float32"], ["TENSOR", [80, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 73, 73, "float32"], [80, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 39282, "c": null, "e": [["tile_f", "sp", [5, 2, 2, 4]], ["tile_y", "sp", [73, 1, 1, 1]], ["tile_x", "sp", [1, 1, 73, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.2667290093627303e-05], 0, 1.5402162075042725, 1535434456.8025699], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 32, 147, 147], "float32"], ["TENSOR", [64, 32, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 147, 147, "float32"], [64, 32, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 18733533, "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [49, 1, 1, 3]], ["tile_x", "sp", [3, 1, 49, 1]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00013859728162511541], 0, 4.481893539428711, 1535435277.8664289], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 32, 149, 149], "float32"], ["TENSOR", [32, 32, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 149, 149, "float32"], [32, 32, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6050372, "c": null, "e": [["tile_f", "sp", [1, 1, 32, 1]], ["tile_y", "sp", [21, 7, 1, 1]], ["tile_x", "sp", [7, 1, 7, 3]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.415943226079594e-05], 0, 1.405092716217041, 1535437071.6393325], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 3, 299, 299], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 299, 299, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 27342, "c": null, "e": [["tile_f", "sp", [1, 8, 4, 1]], ["tile_y", "sp", [149, 1, 1, 1]], ["tile_x", "sp", [1, 1, 149, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.0394228291887484e-05], 0, 2.09767746925354, 1535437921.065245], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 992, 7, 7], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 7, 7, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 108510, "c": null, "e": [["tile_f", "sp", [4, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [8, 124]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.010334864080232e-05], 0, 1.5134379863739014, 1535510578.9815462], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 7, 7, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 237150, "c": null, "e": [["tile_f", "sp", [4, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.905045659898477e-05], 0, 1.168386697769165, 1535511125.4239872], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 928, 7, 7], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 7, 7, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 100826, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.9401293562564634e-05], 0, 1.3234310150146484, 1535511541.6431155], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 896, 7, 7], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 7, 7, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 168015, "c": null, "e": [["tile_f", "sp", [32, 1, 4, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.4511918993879114e-05], 0, 2.1898694038391113, 1535512300.6017644], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 864, 7, 7], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 7, 7, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 212186, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [18, 48]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.7799368040748638e-05], 0, 1.2502412796020508, 1535512769.6244454], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 832, 7, 7], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 7, 7, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 118106, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.7712727648030198e-05], 0, 1.373544454574585, 1535513219.419708], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 800, 7, 7], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 7, 7, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 152670, "c": null, "e": [["tile_f", "sp", [4, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [40, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.6151012154458428e-05], 0, 1.2929022312164307, 1535513641.1468432], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 768, 7, 7], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 7, 7, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 152666, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.6367799781778505e-05], 0, 1.3143672943115234, 1535514062.8160617], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 736, 7, 7], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 7, 7, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8666, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [46, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.5746391098981842e-05], 0, 1.349114179611206, 1535514467.9158273], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 704, 7, 7], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 7, 7, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 118106, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [44, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5157039765845782e-05], 0, 1.1946027278900146, 1535514920.2627847], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 672, 7, 7], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 7, 7, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 116175, "c": null, "e": [["tile_f", "sp", [32, 1, 4, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [24, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.1306232711966325e-05], 0, 1.6486470699310303, 1535515516.133535], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 640, 7, 7], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 7, 7, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 137310, "c": null, "e": [["tile_f", "sp", [4, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.3417131932021466e-05], 0, 1.2014439105987549, 1535515972.7182794], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 608, 7, 7], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 7, 7, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 31706, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [38, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.3372493136031379e-05], 0, 1.2704458236694336, 1535516428.3896213], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 576, 7, 7], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 7, 7, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 187226, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [12, 48]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.2496573377156944e-05], 0, 1.2879750728607178, 1535517005.3094254], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 544, 7, 7], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 7, 7, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 112346, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [2, 272]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.2027159786420146e-05], 0, 1.5824146270751953, 1535517491.8704495], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 7, 7], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 7, 7, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 75730, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 1, 16, 1]], ["tile_x", "sp", [2, 1, 4, 2]], ["tile_rc", "sp", [2, 64]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[8.30705703527327e-06], 0, 6.790492057800293, 1535518080.136758], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8666, "c": null, "e": [["tile_f", "sp", [8, 1, 16, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.1499615007281368e-05], 0, 1.2344355583190918, 1535518522.5797985], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 254349, "c": null, "e": [["tile_f", "sp", [16, 4, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.065668067906225e-05], 0, 1.1561696529388428, 1535519470.7963555], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 992, 14, 14], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 14, 14, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 507382, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [62, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.5209433299731184e-05], 0, 1.4269888401031494, 1535520041.0994248], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 960, 14, 14], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 14, 14, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2903547, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [60, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.521613414019163e-05], 0, 1.337665319442749, 1535520693.654924], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 928, 14, 14], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 14, 14, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1244662, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.378593229744728e-05], 0, 1.4942715167999268, 1535521353.5999422], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 896, 14, 14], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 14, 14, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1701616, "c": null, "e": [["tile_f", "sp", [16, 2, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [2, 1, 7, 1]], ["tile_rc", "sp", [32, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.22431098265896e-05], 0, 5.287805080413818, 1535522295.2589383], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 864, 14, 14], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 14, 14, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2473462, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [54, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.2237321375630003e-05], 0, 2.585005283355713, 1535522937.8135786], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 832, 14, 14], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 14, 14, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 169462, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.1596151432272925e-05], 0, 4.9706690311431885, 1535523507.5953581], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 800, 14, 14], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 14, 14, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1859062, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [50, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.053800984548065e-05], 0, 1.2881567478179932, 1535524279.4223301], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 768, 14, 14], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 14, 14, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 230907, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.9923293531677513e-05], 0, 3.857074737548828, 1535524878.4241247], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 736, 14, 14], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 14, 14, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 876022, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [46, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.9062312920848685e-05], 0, 7.322245359420776, 1535525558.424067], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 704, 14, 14], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 14, 14, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1459707, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [44, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.8408509690873404e-05], 0, 3.769604444503784, 1535526161.9636657], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 672, 14, 14], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 14, 14, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2504187, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [42, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.768176361065284e-05], 0, 3.958688497543335, 1535526768.0907528], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 640, 14, 14], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 14, 14, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 200187, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [40, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.7005916666666666e-05], 0, 1.2610886096954346, 1535527376.862039], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 608, 14, 14], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 14, 14, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1244662, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [38, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.613369627876963e-05], 0, 1.232668161392212, 1535528073.8818192], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 14, 14, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2196982, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [36, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5461951984371785e-05], 0, 6.229677200317383, 1535528790.1099694], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 544, 14, 14], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 14, 14, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 138747, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [34, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.4684608750978857e-05], 0, 4.6408140659332275, 1535529488.5018454], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1060342, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.3865336010338778e-05], 0, 1.2443079948425293, 1535530103.911455], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 480, 14, 14], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 14, 14, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 323062, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [30, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.3085105267748126e-05], 0, 2.159207344055176, 1535530789.2544587], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 448, 14, 14], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 14, 14, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2442769, "c": null, "e": [["tile_f", "sp", [16, 1, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [8, 56]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.1815016365066875e-05], 0, 2.2030670642852783, 1535531790.218784], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 416, 14, 14], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 14, 14, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1275382, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [26, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.1618833707517225e-05], 0, 1.2819890975952148, 1535532505.0172954], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 230902, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [24, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.0968731876644256e-05], 0, 8.432388544082642, 1535533185.603903], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 352, 14, 14], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 14, 14, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 169467, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [22, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.0255205892405495e-05], 0, 2.2535476684570312, 1535533836.1736326], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 320, 14, 14], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 14, 14, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1490427, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [20, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.516839233599796e-06], 0, 5.255681037902832, 1535534486.7834814], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 14, 14, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1920507, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [18, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.70503696181966e-06], 0, 1.3429267406463623, 1535535109.3423762], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 26631, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 2, 4, 2]], ["tile_x", "sp", [1, 1, 49, 1]], ["tile_rc", "sp", [1, 128]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[1.2433500124264767e-05], 0, 2.377492904663086, 1535535687.8450365], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 415222, "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.865961681411298e-06], 0, 6.301952362060547, 1535536188.88553], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9030482, "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [2, 7, 2, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.274914188034188e-05], 0, 1.4122755527496338, 1535537441.3448718], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 480, 28, 28], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 28, 28, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2193775, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [24, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.385060962482191e-05], 0, 1.294996738433838, 1535538590.7639327], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 448, 28, 28], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 28, 28, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1425743, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.4060694725028058e-05], 0, 1.290968894958496, 1535539867.135623], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 416, 28, 28], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 28, 28, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7953748, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [26, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.562269837745517e-05], 0, 4.757439851760864, 1535540693.427137], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 384, 28, 28], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 28, 28, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 11217748, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 48]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.2392697674418605e-05], 0, 1.334132194519043, 1535541874.2043667], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 352, 28, 28], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 28, 28, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3345628, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [22, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.204317429577465e-05], 0, 4.975332975387573, 1535543293.7426958], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 320, 28, 28], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 28, 28, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9491839, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.6622234280030944e-05], 0, 2.4568378925323486, 1535544351.0567925], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 288, 28, 28], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 28, 28, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2387839, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 36]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.6870072167266188e-05], 0, 1.5000736713409424, 1535545510.5365977], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5841778, "c": null, "e": [["tile_f", "sp", [4, 1, 16, 2]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5711118778753657e-05], 0, 1.3064296245574951, 1535546967.341969], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 224, 28, 28], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 28, 28, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3345623, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 14]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.3947885016136469e-05], 0, 3.1074414253234863, 1535548002.22679], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 28, 28, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2001748, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [4, 48]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[1.2573500542978866e-05], 0, 1.5108652114868164, 1535549122.14382], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 28, 28], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 28, 28, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8337775, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.355404564444722e-06], 0, 1.492844581604004, 1535550192.1845133], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 209989, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 1, 8, 4]], ["tile_x", "sp", [7, 1, 14, 2]], ["tile_rc", "sp", [4, 32]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[1.5450798312493645e-05], 0, 1.48533034324646, 1535551019.5225036], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5267818, "c": null, "e": [["tile_f", "sp", [4, 1, 16, 2]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.9109391835766e-06], 0, 3.3182780742645264, 1535552192.8403792], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2566943, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [14, 2, 1, 2]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.759123719542516e-05], 0, 3.0082433223724365, 1535553514.2241092], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 224, 56, 56], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 56, 56, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21004996, "c": null, "e": [["tile_f", "sp", [4, 2, 4, 4]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [32, 7]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[3.3200856352368304e-05], 0, 1.5159938335418701, 1535554700.846309], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 192, 56, 56], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 56, 56, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 37133000, "c": null, "e": [["tile_f", "sp", [2, 2, 8, 4]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [16, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[2.982201131176821e-05], 0, 1.5415749549865723, 1535555668.8435583], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 160, 56, 56], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 56, 56, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5644948, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [8, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[2.5671416794937575e-05], 0, 1.374856948852539, 1535558237.050249], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [128, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 56, 56, "float32"], [128, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 32528480, "c": null, "e": [["tile_f", "sp", [2, 2, 8, 4]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [8, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[1.5659986445626108e-05], 0, 4.918555021286011, 1535560222.7970054], "v": 0.1} {"i": ["cuda -model=1080ti", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 203213, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 1, 8, 4]], ["tile_x", "sp", [7, 1, 28, 1]], ["tile_rc", "sp", [8, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[4.457051544860368e-05], 0, 2.7779624462127686, 1535560904.415111], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 2048, 7, 7], "float32"], ["TENSOR", [512, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 7, 7, "float32"], [512, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 58336, "e": [["tile_f", "sp", [8, 4, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "c": null}], "r": [[0.0005035017], 0, 12.29834508895874, 1554955488.1772192], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [2048, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [2048, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 254471, "e": [["tile_f", "sp", [64, 4, 8, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0005412969999999999], 0, 1.7040576934814453, 1554977839.9065943], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [2048, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [2048, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 142144, "e": [["tile_f", "sp", [16, 1, 32, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 7, 1, 1]], ["tile_rc", "sp", [256, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "c": null}], "r": [[0.0013291223], 0, 3.519747257232666, 1554970439.7118416], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 9586, "e": [["tile_f", "sp", [8, 1, 16, 4]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [256, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "c": null}], "r": [[0.00040170630000000004], 0, 10.144535064697266, 1554962211.3523424], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [256, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [256, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 2488262, "e": [["tile_f", "sp", [8, 4, 4, 2]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 1, 2, 7]], ["tile_rc", "sp", [128, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.00034071700000000004], 0, 9.662159442901611, 1554948918.5204923], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [1024, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [1024, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 3552430, "e": [["tile_f", "sp", [32, 8, 4, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0003303043], 0, 9.292404174804688, 1554959345.3456247], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [1024, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 1647687, "e": [["tile_f", "sp", [8, 8, 16, 1]], ["tile_y", "sp", [7, 2, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "c": null}], "r": [[0.0013268225999999998], 0, 5.178571462631226, 1554965249.3803976], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 2168137, "e": [["tile_f", "sp", [2, 4, 32, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 7, 2, 1]], ["tile_rc", "sp", [256, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.000460863], 0, 7.6866724491119385, 1554966621.4672701], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 8146507, "e": [["tile_f", "sp", [4, 8, 1, 4]], ["tile_y", "sp", [7, 2, 2, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0004175422], 0, 1.8580596446990967, 1554945646.8539002], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [512, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [512, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 14933965, "e": [["tile_f", "sp", [8, 4, 2, 8]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.000431368], 0, 6.44800329208374, 1554980848.7567935], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 6837970, "e": [["tile_f", "sp", [8, 2, 4, 8]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [128, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "c": null}], "r": [[0.0011816415], 0, 9.641122579574585, 1554953178.4609714], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"t": "winograd", "i": 519574, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 1, 4, 8]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0004963299], 0, 12.552602767944336, 1554943327.7279453], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 9077604, "e": [["tile_f", "sp", [1, 1, 32, 4]], ["tile_y", "sp", [14, 1, 2, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.000269107], 0, 8.641182899475098, 1554969244.0934107], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 25451286, "e": [["tile_f", "sp", [1, 8, 4, 2]], ["tile_y", "sp", [14, 2, 1, 2]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.00044651580000000003], 0, 8.235926151275635, 1554954272.9459844], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [256, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [256, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 32067807, "e": [["tile_f", "sp", [4, 16, 2, 2]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [16, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.00044522379999999996], 0, 8.113616228103638, 1554957196.4803026], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 20617002, "e": [["tile_f", "sp", [1, 8, 4, 2]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0001289671], 0, 4.256742000579834, 1554979811.828667], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 62969413, "e": [["tile_f", "sp", [1, 1, 8, 8]], ["tile_y", "sp", [28, 1, 1, 4]], ["tile_x", "sp", [4, 2, 14, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0008444969], 0, 8.917776823043823, 1554974316.7219121], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 71826, "c": null, "e": [["tile_f", "sp", [16, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [64, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0005240697591623037], 0, 1.9879345893859863, 1538684133.1681948], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 21258, "c": null, "e": [["tile_f", "sp", [32, 2, 8, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.178158100381194e-05], 0, 1.0447304248809814, 1538685070.5457134], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [1024, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 113016, "c": null, "e": [["tile_f", "sp", [16, 2, 32, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0003075978134556575], 0, 1.165560245513916, 1538686093.9553363], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 16299, "c": null, "e": [["tile_f", "sp", [128, 1, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[3.744394313505425e-05], 0, 1.0555353164672852, 1538686880.8105361], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 761442, "c": null, "e": [["tile_f", "sp", [16, 8, 4, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [64, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002860706657142857], 0, 1.1914925575256348, 1538688643.803028], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 312850, "c": null, "e": [["tile_f", "sp", [256, 1, 2, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[4.69354909512761e-05], 0, 1.1330385208129883, 1538690229.7487793], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 705122, "c": null, "e": [["tile_f", "sp", [16, 8, 4, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00015319955436447167], 0, 1.1951820850372314, 1538691299.4314046], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 234144, "c": null, "e": [["tile_f", "sp", [128, 1, 2, 1]], ["tile_y", "sp", [1, 1, 7, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.102415431348724e-05], 0, 1.0164296627044678, 1538692213.639821], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3280722, "c": null, "e": [["tile_f", "sp", [4, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.000316804914556962], 0, 1.1764893531799316, 1538692968.977005], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1173975, "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.466998284862043e-05], 0, 4.699853897094727, 1538693939.3165033], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5128722, "c": null, "e": [["tile_f", "sp", [4, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0001820941309090909], 0, 4.953519344329834, 1538694726.737019], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1043160, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [2, 2, 7, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00010517174790794979], 0, 1.7660229206085205, 1538695922.5100384], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 3358080, "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [2, 1, 4, 7]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001251102944099379], 0, 1.777782917022705, 1538698543.7329175], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 112, 112, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 2886240, "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [7, 1, 4, 2]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001965908774319066], 0, 1.0119473934173584, 1538701164.2750542], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 56412935, "c": null, "e": [["tile_f", "sp", [1, 2, 16, 2]], ["tile_y", "sp", [28, 4, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [2, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00018805118421052631], 0, 2.126875638961792, 1538702679.443884], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 112, 112, "float32"], [32, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4725392, "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [4, 1, 4, 7]], ["tile_x", "sp", [2, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00012950566279069768], 0, 1.7964670658111572, 1538703580.0487945], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 16795901, "c": null, "e": [["tile_f", "sp", [1, 1, 8, 4]], ["tile_y", "sp", [28, 2, 1, 2]], ["tile_x", "sp", [2, 1, 56, 1]], ["tile_rc", "sp", [1, 3]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00015486618730650156], 0, 19.915329933166504, 1538704740.3396301], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 992, 7, 7], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 7, 7, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 78023, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [62, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00012308115431348724], 0, 1.0024254322052002, 1538788111.7026997], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 7, 7, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 22343, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[9.885210177865612e-05], 0, 1.075761079788208, 1538789049.4131649], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 928, 7, 7], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 7, 7, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8903, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00011706999299065421], 0, 5.105271100997925, 1538789964.8743596], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 896, 7, 7], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 7, 7, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 12743, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [56, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00011415141807909604], 0, 5.740638494491577, 1538790799.898342], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 864, 7, 7], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 7, 7, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 20455, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [36, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[9.283645821854913e-05], 0, 1.0991449356079102, 1538792263.1661618], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 832, 7, 7], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 7, 7, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 91463, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00010605849119170984], 0, 1.1531589031219482, 1538793276.2868276], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 800, 7, 7], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 7, 7, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 14719, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [40, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[8.286428052805281e-05], 0, 10.84407353401184, 1538794516.506616], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 768, 7, 7], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 7, 7, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 16610, "c": null, "e": [["tile_f", "sp", [8, 2, 4, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 24]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[9.069063076923077e-05], 0, 1.1301770210266113, 1538796067.3274465], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 736, 7, 7], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 7, 7, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 124107, "c": null, "e": [["tile_f", "sp", [8, 2, 4, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [46, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.65327084942085e-05], 0, 3.662139415740967, 1538796965.1628647], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 704, 7, 7], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 7, 7, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 122339, "c": null, "e": [["tile_f", "sp", [2, 2, 16, 2]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [22, 32]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.34424737718568e-05], 0, 1.3078110218048096, 1538798665.5558236], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 672, 7, 7], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 7, 7, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 24290, "c": null, "e": [["tile_f", "sp", [8, 2, 4, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [24, 28]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.355548569332356e-05], 0, 15.131038427352905, 1538800102.8032422], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 640, 7, 7], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 7, 7, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 168258, "c": null, "e": [["tile_f", "sp", [4, 8, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.260219464544139e-05], 0, 11.109286546707153, 1538801733.528239], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 608, 7, 7], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 7, 7, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 31827, "c": null, "e": [["tile_f", "sp", [4, 2, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [38, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[6.92368214532872e-05], 0, 5.3235862255096436, 1538802662.3141403], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 576, 7, 7], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 7, 7, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 57028, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [36, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.943618764845606e-05], 0, 2.8046820163726807, 1538803707.6444323], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 544, 7, 7], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 7, 7, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 32068, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [34, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.5462947339246124e-05], 0, 3.488588809967041, 1538804940.014018], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 7, 7], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 7, 7, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 85025, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 4, 8, 1]], ["tile_x", "sp", [1, 2, 8, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[3.387354208754209e-05], 0, 2.096045732498169, 1538806335.7966588], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 47428, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[5.335046297282898e-05], 0, 7.114248037338257, 1538807356.784124], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1024, 14, 14], "float32"], ["TENSOR", [512, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 14, 14, "float32"], [512, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 2666034, "e": [["tile_f", "sp", [16, 4, 4, 2]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 7, 2, 1]], ["tile_rc", "sp", [128, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0006444008999999999], 0, 4.5529913902282715, 1554950358.2710702], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 992, 14, 14], "float32"], ["TENSOR", [128, 992, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 992, 14, 14, "float32"], [128, 992, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1613479, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [62, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00018629811131725419], 0, 1.412217378616333, 1538811130.1064463], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 960, 14, 14], "float32"], ["TENSOR", [128, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 14, 14, "float32"], [128, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4624039, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [60, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00018082491335740073], 0, 1.389707326889038, 1538812582.4735014], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 928, 14, 14], "float32"], ["TENSOR", [128, 928, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 928, 14, 14, "float32"], [128, 928, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1613479, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [58, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00017594096335078532], 0, 32.868462324142456, 1538814037.147122], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 896, 14, 14], "float32"], ["TENSOR", [128, 896, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 896, 14, 14, "float32"], [128, 896, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2657959, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [56, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00016909465551839464], 0, 2.948920965194702, 1538815326.2985299], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 864, 14, 14], "float32"], ["TENSOR", [128, 864, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 864, 14, 14, "float32"], [128, 864, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1643783, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [108, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00017283390705679863], 0, 1.053091049194336, 1538816925.362273], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 832, 14, 14], "float32"], ["TENSOR", [128, 832, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 832, 14, 14, "float32"], [128, 832, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 599308, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [52, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.000165996498349835], 0, 1.1660890579223633, 1538818077.9467108], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 800, 14, 14], "float32"], ["TENSOR", [128, 800, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 800, 14, 14, "float32"], [128, 800, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2965159, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [50, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00015221640881458965], 0, 1.501213550567627, 1538820032.9254172], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 768, 14, 14], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 14, 14, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 783628, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [48, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00015289620182094084], 0, 1.3272161483764648, 1538821218.725461], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 736, 14, 14], "float32"], ["TENSOR", [128, 736, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 736, 14, 14, "float32"], [128, 736, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1982119, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [46, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001415780463483146], 0, 1.4632749557495117, 1538822696.5618985], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 704, 14, 14], "float32"], ["TENSOR", [128, 704, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 704, 14, 14, "float32"], [128, 704, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1889959, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [44, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001352428239247312], 0, 1.414778232574463, 1538824048.3295507], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 672, 14, 14], "float32"], ["TENSOR", [128, 672, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 672, 14, 14, "float32"], [128, 672, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3241639, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [42, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00012970178120978121], 0, 1.4736008644104004, 1538825535.8499055], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 640, 14, 14], "float32"], ["TENSOR", [128, 640, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 640, 14, 14, "float32"], [128, 640, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2657959, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [40, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001242407141089109], 0, 3.6424829959869385, 1538827139.7220483], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 608, 14, 14], "float32"], ["TENSOR", [128, 608, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 608, 14, 14, "float32"], [128, 608, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 846263, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [2, 1, 1, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [76, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.000124436], 0, 10.96009349822998, 1538828279.2371376], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [128, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 14, 14, "float32"], [128, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3487399, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [36, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00011318031354983202], 0, 13.00906491279602, 1538829970.9019978], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 544, 14, 14], "float32"], ["TENSOR", [128, 544, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 544, 14, 14, "float32"], [128, 544, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1613479, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [34, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00010710978525641025], 0, 2.2806811332702637, 1538832060.504336], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [128, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [128, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1367695, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00010063262914572865], 0, 10.810802698135376, 1538833448.3748267], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 480, 14, 14], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 14, 14, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1060108, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [30, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00010090847177419355], 0, 1.173581838607788, 1538834655.558986], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 448, 14, 14], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 14, 14, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2350759, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [28, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.046342356115108e-05], 0, 1.443748950958252, 1538836551.1019008], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 416, 14, 14], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 14, 14, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2012839, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [26, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[8.418914297729183e-05], 0, 1.418027400970459, 1538838687.6621108], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2197159, "c": null, "e": [["tile_f", "sp", [4, 1, 8, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [24, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.74511379044684e-05], 0, 1.4423730373382568, 1538840059.6083984], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 352, 14, 14], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 14, 14, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 537868, "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [22, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[7.273062309368192e-05], 0, 14.969782829284668, 1538841445.5116246], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 320, 14, 14], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 14, 14, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1859183, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [40, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[6.237981982543641e-05], 0, 16.40942907333374, 1538842541.3345072], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 14, 14, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2934411, "c": null, "e": [["tile_f", "sp", [4, 4, 4, 2]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [36, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[5.029475389251632e-05], 0, 1.5659475326538086, 1538843875.1409004], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 21032, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 4, 4, 2]], ["tile_x", "sp", [1, 1, 49, 1]], ["tile_rc", "sp", [4, 32]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00010605685370950888], 0, 2.8770177364349365, 1538845070.5571578], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 661098, "c": null, "e": [["tile_f", "sp", [4, 8, 4, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[4.281321889696452e-05], 0, 1.3150086402893066, 1538846421.29054], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [256, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [256, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 11142848, "e": [["tile_f", "sp", [4, 4, 8, 2]], ["tile_y", "sp", [4, 1, 7, 1]], ["tile_x", "sp", [2, 7, 2, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0009459629], 0, 10.448927640914917, 1554972391.3655684], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 480, 28, 28], "float32"], ["TENSOR", [128, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 28, 28, "float32"], [128, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10422097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [60, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002770921961325967], 0, 1.2362148761749268, 1538850912.3550613], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 448, 28, 28], "float32"], ["TENSOR", [128, 448, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 28, 28, "float32"], [128, 448, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6198097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [56, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00025754312082262214], 0, 1.3426780700683594, 1538852293.8814673], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 416, 28, 28], "float32"], ["TENSOR", [128, 416, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 416, 28, 28, "float32"], [128, 416, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2934097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [52, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002442188317073171], 0, 2.747875928878784, 1538854025.399341], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 28, 28], "float32"], ["TENSOR", [128, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 28, 28, "float32"], [128, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4086097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [48, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00022825630227272728], 0, 6.214647531509399, 1538855270.4081788], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 352, 28, 28], "float32"], ["TENSOR", [128, 352, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 352, 28, 28, "float32"], [128, 352, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5238097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [44, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00021150272151898732], 0, 2.7867863178253174, 1538856985.0964792], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 320, 28, 28], "float32"], ["TENSOR", [128, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 28, 28, "float32"], [128, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6225984, "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [40, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00019690313976377952], 0, 1.1801960468292236, 1538858663.3631353], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 288, 28, 28], "float32"], ["TENSOR", [128, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 28, 28, "float32"], [128, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7926097, "c": null, "e": [["tile_f", "sp", [2, 1, 8, 8]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [36, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00018197597272727272], 0, 1.2835686206817627, 1538860130.2034986], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4113984, "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0001616460323101777], 0, 1.2255666255950928, 1538861855.9401083], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 224, 28, 28], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 28, 28, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3157223, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [28, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00014706854852941176], 0, 1.8666982650756836, 1538863752.199898], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 28, 28, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6421223, "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.000128685567394095], 0, 22.308583974838257, 1538865795.462426], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 28, 28], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 28, 28, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3153984, "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00011284384026996625], 0, 17.547949075698853, 1538867716.5715816], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 152856, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 4, 4, 2]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0001753623992994746], 0, 2.021149158477783, 1538869263.9188836], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3729984, "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[9.381299249530957e-05], 0, 1.9887633323669434, 1538871065.2399995], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [128, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [128, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 36378013, "e": [["tile_f", "sp", [2, 32, 2, 1]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0008156606], 0, 6.791729211807251, 1554976592.3804111], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 224, 56, 56], "float32"], ["TENSOR", [128, 224, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 224, 56, 56, "float32"], [128, 224, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 12509816, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [28, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0004905505902439024], 0, 1.1805377006530762, 1538874648.0994287], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 56, 56], "float32"], ["TENSOR", [128, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 56, 56, "float32"], [128, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 25565816, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00042907040170940173], 0, 4.26771354675293, 1538876153.6036224], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 56, 56], "float32"], ["TENSOR", [128, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 56, 56, "float32"], [128, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21725816, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0003679489267399267], 0, 1.2252421379089355, 1538878120.6407452], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8669816, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00030473472948328267], 0, 3.462869644165039, 1538879663.278558], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [128, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 56, 56, "float32"], [128, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 22493816, "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [12, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002451529803921569], 0, 4.241109609603882, 1538881371.0287638], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [32, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [32, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 108068, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 2, 4, 4]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[0.0004426895309734513], 0, 2.5171635150909424, 1538882574.138559], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 18653840, "c": null, "e": [["tile_f", "sp", [2, 2, 8, 4]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001896496231060606], 0, 2.462634563446045, 1538884874.1319482], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"t": "winograd", "i": 410647, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 2, 16, 4]], ["tile_x", "sp", [1, 4, 4, 1]], ["tile_rc", "sp", [64, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0006187037], 0, 8.767261505126953, 1554945239.17839], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 107671, "c": null, "e": [["tile_f", "sp", [4, 2, 32, 2]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.00011170939999999998], 0, 2.5888760089874268, 1555022071.900302], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 608610, "c": null, "e": [["tile_f", "sp", [8, 1, 32, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [128, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005576546], 0, 2.253612995147705, 1555024034.1775575], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"t": "winograd", "i": 79987, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 1, 16, 8]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0004759948], 0, 8.792563199996948, 1554940745.0696976], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 1457647, "c": null, "e": [["tile_f", "sp", [2, 4, 32, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[9.957760000000001e-05], 0, 1.0042572021484375, 1555021138.3377924], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 7778627, "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [1, 2, 7, 1]], ["tile_x", "sp", [2, 7, 1, 1]], ["tile_rc", "sp", [128, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004268699], 0, 8.907147645950317, 1555019702.5898302], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 6028824, "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001096032], 0, 11.373619556427002, 1555025987.4431183], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 30965602, "c": null, "e": [["tile_f", "sp", [2, 1, 16, 4]], ["tile_y", "sp", [14, 1, 2, 1]], ["tile_x", "sp", [1, 7, 4, 1]], ["tile_rc", "sp", [64, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004450338], 0, 4.735571622848511, 1555023493.8178582], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"t": "winograd", "i": 321677, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 4, 2]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0005076439], 0, 14.493378162384033, 1554942130.618302], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 93434, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [8, 1, 8, 8]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [128, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0016002932380952382], 0, 1.2927846908569336, 1538706207.6221724], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1171807, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 4, 16, 2]], ["tile_x", "sp", [2, 7, 14, 1]], ["tile_rc", "sp", [64, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.005323976947368421], 0, 3.491469383239746, 1538707562.3229945], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1062454, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [16, 4, 4, 2]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.002699300078947368], 0, 1.712977647781372, 1538710204.4787867], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 796847, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [8, 4, 4, 2]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [32, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.003582328821428571], 0, 30.718233823776245, 1538711290.5169294], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 301901, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [8, 2, 2, 8]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [32, 4]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "winograd"}], "r": [[0.002160468680851064], 0, 2.2771291732788086, 1538713514.750547], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1813400, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 2, 8, 4]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0040044463200000005], 0, 12.513367891311646, 1538715498.4120855], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1309376, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [2, 4, 8, 2]], ["tile_x", "sp", [7, 7, 16, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.002707434108108108], 0, 13.540417194366455, 1538717126.7233148], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2195328, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 2, 4, 8]], ["tile_x", "sp", [49, 4, 16, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.005546605210526316], 0, 2.467291831970215, 1538719045.851881], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 161059560, "c": null, "e": [["tile_f", "sp", [1, 4, 4, 4]], ["tile_y", "sp", [28, 1, 1, 8]], ["tile_x", "sp", [14, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0005955082426035502], 0, 1.6560072898864746, 1538721566.8455422], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [192, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [192, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3813136, "c": null, "e": [["tile_f", "sp", [6, 1, 8, 4]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [2, 1, 4, 1]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00019577969667318982], 0, 16.34182333946228, 1538724300.7337055], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [448, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [448, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7045458, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 7]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 2, 4, 1]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00032905157565789474], 0, 1.4203288555145264, 1538725181.1495411], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [384, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [384, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7828109, "c": null, "e": [["tile_f", "sp", [12, 1, 8, 4]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0003022847069486405], 0, 18.06054711341858, 1538726233.047221], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 2048, 8, 8], "float32"], ["TENSOR", [320, 2048, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 2048, 8, 8, "float32"], [320, 2048, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8705133, "c": null, "e": [["tile_f", "sp", [10, 4, 8, 1]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [128, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00026277220997375327], 0, 1.9644010066986084, 1538727120.9285002], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [192, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [192, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8301678, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [2, 1, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [64, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0001213984012121212], 0, 1.8440706729888916, 1538729278.8008165], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 448, 8, 8], "float32"], ["TENSOR", [384, 448, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 448, 8, 8, "float32"], [384, 448, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 215626, "c": null, "e": [["tile_b", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [1, 2, 192, 1]], ["tile_x", "sp", [1, 1, 1, 4]], ["tile_rc", "sp", [112, 4]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0006950035655172414], 0, 31.39168119430542, 1538730783.9491055], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [448, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [448, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5710194, "c": null, "e": [["tile_f", "sp", [8, 1, 8, 7]], ["tile_y", "sp", [1, 2, 2, 2]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [80, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00021037706289308176], 0, 1.2826430797576904, 1538732840.2017002], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 3, 1], "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 3, 1, "float32"], [1, 1], [1, 0], [1, 1], "NCHW", "float32"], {"i": 31961559, "c": null, "e": [["tile_f", "sp", [6, 1, 16, 4]], ["tile_y", "sp", [1, 4, 2, 1]], ["tile_x", "sp", [1, 1, 8, 1]], ["tile_rc", "sp", [32, 12]], ["tile_ry", "sp", [3, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00018740938764044942], 0, 1.6747934818267822, 1538734623.9567337], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 8, 8], "float32"], ["TENSOR", [384, 384, 1, 3], "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 8, 8, "float32"], [384, 384, 1, 3, "float32"], [1, 1], [0, 1], [1, 1], "NCHW", "float32"], {"i": 16468119, "c": null, "e": [["tile_f", "sp", [6, 1, 16, 4]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [48, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00018865015442561206], 0, 1.2870540618896484, 1538735842.1039355], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [384, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [384, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1492011, "c": null, "e": [["tile_f", "sp", [6, 2, 16, 2]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [64, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00018899474528301887], 0, 1.1503286361694336, 1538737294.7999964], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 1280, 8, 8], "float32"], ["TENSOR", [320, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 8, 8, "float32"], [320, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8302015, "c": null, "e": [["tile_f", "sp", [8, 2, 10, 2]], ["tile_y", "sp", [1, 2, 4, 1]], ["tile_x", "sp", [1, 1, 4, 2]], ["tile_rc", "sp", [64, 20]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00016675274875207986], 0, 9.518572330474854, 1538738244.635086], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13428977, "c": null, "e": [["tile_f", "sp", [3, 4, 16, 1]], ["tile_y", "sp", [2, 1, 4, 1]], ["tile_x", "sp", [1, 1, 2, 4]], ["tile_rc", "sp", [96, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00018094568173598554], 0, 1.1965785026550293, 1538741184.7944498], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [320, 192, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [320, 192, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13428971, "c": null, "e": [["tile_f", "sp", [5, 4, 16, 1]], ["tile_y", "sp", [2, 1, 4, 1]], ["tile_x", "sp", [1, 1, 2, 4]], ["tile_rc", "sp", [96, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00022400241741071428], 0, 6.386166572570801, 1538742171.4319954], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 783342, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0005600653463687151], 0, 1.6617372035980225, 1538744790.6790981], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 17, 17], "float32"], ["TENSOR", [192, 192, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 17, 17, "float32"], [192, 192, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 337140, "c": null, "e": [["tile_f", "sp", [4, 2, 12, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.000562955595505618], 0, 13.148250341415405, 1538746218.8638897], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 669700, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00044481565777777776], 0, 1.6090643405914307, 1538747566.9998116], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 321089, "c": null, "e": [["tile_f", "sp", [4, 2, 10, 2]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 7]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00039756966666666666], 0, 1.2615001201629639, 1538748741.3635602], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [192, 160, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [192, 160, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 670446, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [20, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0004707163849765258], 0, 1.5564532279968262, 1538750318.50236], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 160, 17, 17], "float32"], ["TENSOR", [160, 160, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 17, 17, "float32"], [160, 160, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 321089, "c": null, "e": [["tile_f", "sp", [4, 2, 10, 2]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00039829519444444444], 0, 1.8980224132537842, 1538751516.9735677], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [160, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [160, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 342982, "c": null, "e": [["tile_f", "sp", [5, 4, 8, 1]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [96, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0003170707721518987], 0, 11.788979053497314, 1538752849.427508], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 449284, "c": null, "e": [["tile_f", "sp", [6, 4, 8, 1]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0003626276859205776], 0, 1.572218656539917, 1538753895.1277938], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 160735, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00031408786249999997], 0, 7.100447654724121, 1538755100.678874], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [192, 128, 7, 1], "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [192, 128, 7, 1, "float32"], [1, 1], [3, 0], [1, 1], "NCHW", "float32"], {"i": 450030, "c": null, "e": [["tile_f", "sp", [6, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00038391145977011496], 0, 4.594708442687988, 1538756344.7816327], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 17, 17], "float32"], ["TENSOR", [128, 128, 1, 7], "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 17, 17, "float32"], [128, 128, 1, 7, "float32"], [1, 1], [0, 3], [1, 1], "NCHW", "float32"], {"i": 160495, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00029028242363112394], 0, 1.5227651596069336, 1538757551.6295998], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [128, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [128, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 45535, "c": null, "e": [["tile_f", "sp", [4, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [96, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0003036941212121212], 0, 1.1823575496673584, 1538758601.7473073], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 768, 17, 17], "float32"], ["TENSOR", [192, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 17, 17, "float32"], [192, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 132180, "c": null, "e": [["tile_f", "sp", [4, 2, 12, 2]], ["tile_y", "sp", [1, 17, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [64, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0003279414786885246], 0, 1.2324786186218262, 1538759576.7236032], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 392487, "c": null, "e": [["tile_f", "sp", [2, 8, 6, 1]], ["tile_y", "sp", [17, 1, 1, 1]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [48, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00047008414084507037], 0, 21.658472061157227, 1538761347.4519036], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [384, 288, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [384, 288, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2079038, "c": null, "e": [["tile_f", "sp", [12, 2, 8, 2]], ["tile_y", "sp", [1, 1, 1, 17]], ["tile_x", "sp", [1, 1, 17, 1]], ["tile_rc", "sp", [288, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0030419013636363637], 0, 2.1014552116394043, 1538762292.0791416], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [48, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [48, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 161489, "c": null, "e": [["tile_f", "sp", [1, 4, 6, 2]], ["tile_y", "sp", [7, 5, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [48, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00013624694277929156], 0, 1.8275806903839111, 1538763931.0236416], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 288, 35, 35], "float32"], ["TENSOR", [64, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 35, 35, "float32"], [64, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 900837, "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [7, 1, 5, 1]], ["tile_x", "sp", [1, 1, 5, 7]], ["tile_rc", "sp", [36, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.0002305771013824885], 0, 1.228672742843628, 1538765047.988771], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [48, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [48, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1739571, "c": null, "e": [["tile_f", "sp", [1, 3, 8, 2]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.00015134447734138973], 0, 10.06118893623352, 1538766294.2085798], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 35, 35], "float32"], ["TENSOR", [64, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 35, 35, "float32"], [64, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 248093, "c": null, "e": [["tile_f", "sp", [2, 4, 4, 2]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00019819382411067195], 0, 5.968884706497192, 1538767651.9267356], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [32, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [32, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 853510, "c": null, "e": [["tile_f", "sp", [1, 8, 4, 1]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [48, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[7.956719666931006e-05], 0, 10.136065006256104, 1538768634.9824493], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 96, 35, 35], "float32"], ["TENSOR", [96, 96, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 35, 35, "float32"], [96, 96, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 4247211, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 1, 8, 6]], ["tile_x", "sp", [9, 3, 12, 1]], ["tile_rc", "sp", [8, 12]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.00050543504], 0, 1.2918884754180908, 1538772108.8917615], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 35, 35], "float32"], ["TENSOR", [96, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 35, 35, "float32"], [96, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2526009, "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 2, 3, 8]], ["tile_x", "sp", [3, 3, 36, 1]], ["tile_rc", "sp", [4, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]], "t": "winograd"}], "r": [[0.0003526407711267606], 0, 2.206836223602295, 1538773225.934845], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 48, 35, 35], "float32"], ["TENSOR", [64, 48, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 35, 35, "float32"], [64, 48, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 4965517, "c": null, "e": [["tile_f", "sp", [2, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 5, 7]], ["tile_rc", "sp", [48, 1]], ["tile_ry", "sp", [1, 5]], ["tile_rx", "sp", [1, 5]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0005109715357142857], 0, 2.0362069606781006, 1538775383.4009142], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [48, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [48, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1666769, "c": null, "e": [["tile_f", "sp", [1, 4, 6, 2]], ["tile_y", "sp", [7, 5, 1, 1]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [32, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[9.65264927815207e-05], 0, 1.7492220401763916, 1538776552.4567373], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 35, 35], "float32"], ["TENSOR", [64, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 35, 35, "float32"], [64, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 76036, "c": null, "e": [["tile_f", "sp", [2, 8, 4, 1]], ["tile_y", "sp", [7, 1, 1, 5]], ["tile_x", "sp", [1, 1, 35, 1]], ["tile_rc", "sp", [48, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00015126252416918428], 0, 12.649672508239746, 1538777934.6820767], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 80, 73, 73], "float32"], ["TENSOR", [192, 80, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 80, 73, 73, "float32"], [192, 80, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1240584, "c": null, "e": [["tile_f", "sp", [6, 1, 32, 1]], ["tile_y", "sp", [71, 1, 1, 1]], ["tile_x", "sp", [1, 1, 1, 71]], ["tile_rc", "sp", [80, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.004486951652173913], 0, 1.9496736526489258, 1538779494.6178966], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 73, 73], "float32"], ["TENSOR", [80, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 73, 73, "float32"], [80, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 34727, "c": null, "e": [["tile_f", "sp", [4, 20, 1, 1]], ["tile_y", "sp", [73, 1, 1, 1]], ["tile_x", "sp", [1, 1, 73, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.00042778770638297875], 0, 4.8698039054870605, 1538782207.6844997], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 32, 147, 147], "float32"], ["TENSOR", [64, 32, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 147, 147, "float32"], [64, 32, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 15395544, "c": null, "e": [["tile_f", "sp", [1, 4, 16, 1]], ["tile_y", "sp", [147, 1, 1, 1]], ["tile_x", "sp", [1, 7, 7, 3]], ["tile_rc", "sp", [32, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0026279461282051283], 0, 1.4585211277008057, 1538783344.04221], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 32, 149, 149], "float32"], ["TENSOR", [32, 32, 3, 3], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 149, 149, "float32"], [32, 32, 3, 3, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5944096, "c": null, "e": [["tile_f", "sp", [1, 4, 4, 2]], ["tile_y", "sp", [21, 1, 1, 7]], ["tile_x", "sp", [7, 1, 21, 1]], ["tile_rc", "sp", [32, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]], "t": "direct"}], "r": [[0.001143458465909091], 0, 1.274094581604004, 1538784938.3402982], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 299, 299], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 299, 299, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 29168, "c": null, "e": [["tile_f", "sp", [1, 4, 1, 8]], ["tile_y", "sp", [149, 1, 1, 1]], ["tile_x", "sp", [1, 1, 149, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [3, 1]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "t": "direct"}], "r": [[0.0003318904620462046], 0, 1.3958790302276611, 1538786276.7700129], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 8750147, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [1, 2, 7, 1]], ["tile_x", "sp", [2, 7, 1, 1]], ["tile_rc", "sp", [256, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0007959011], 0, 5.501811265945435, 1554974881.5541902], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 675490, "e": [["tile_f", "sp", [8, 1, 32, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [256, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0010813246], 0, 2.48030948638916, 1554957441.953887], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"t": "direct", "i": 35599042, "e": [["tile_f", "sp", [2, 1, 16, 4]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [64, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]], "c": null}], "r": [[0.0008413876000000001], 0, 1.2982349395751953, 1554963324.6987908], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 28, 28, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 145295, "t": "winograd", "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 2, 4, 4]], ["tile_x", "sp", [1, 7, 28, 1]], ["tile_rc", "sp", [8, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.000168618], 0, 3.932257890701294, 1563836919.5864396], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 23379, "t": "winograd", "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [1, 1, 16, 8]], ["tile_x", "sp", [1, 7, 7, 1]], ["tile_rc", "sp", [16, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0001554413], 0, 1.4906566143035889, 1563838298.3211298], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 7, 7, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 275347, "t": "winograd", "c": null, "e": [["tile_b", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [2, 1, 16, 8]], ["tile_x", "sp", [1, 2, 4, 2]], ["tile_rc", "sp", [64, 4]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00017506920000000001], 0, 8.431310415267944, 1563840461.2246528], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 7, 7, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 3815935, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 2, 16, 1]], ["tile_y", "sp", [1, 1, 4, 1]], ["tile_x", "sp", [1, 2, 2, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002938177], 0, 4.090831995010376, 1563843650.1674786], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 15, 15], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 15, 15, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 408401, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 4, 8, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00015906550000000002], 0, 5.028463840484619, 1563846318.4201725], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 29, 29], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 29, 29, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4129703, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 8, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [2, 1, 7, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00011363549999999999], 0, 4.890873670578003, 1563848403.2441373], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 57, 57], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 57, 57, "float32"], [64, 64, 3, 3, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21695845, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 8, 8]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [64, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00020082810000000002], 0, 12.43902039527893, 1563851241.5179203], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 229, 229], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 229, 229, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 72884639, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 16, 1]], ["tile_y", "sp", [8, 2, 1, 7]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [7, 1]], ["tile_rx", "sp", [1, 7]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007047524], 0, 20.353204488754272, 1563854025.0433474], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3956302, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 2, 8]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[5.43937e-05], 0, 4.9692909717559814, 1563857225.8397942], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 28, 28, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 937157, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 4, 4, 4]], ["tile_y", "sp", [7, 2, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[2.9148899999999998e-05], 0, 7.307832479476929, 1563862737.3738387], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 30887, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[3.7923299999999997e-05], 0, 10.342200517654419, 1563866904.3233705], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 7, 7, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1089916, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 4, 16, 1]], ["tile_y", "sp", [1, 2, 2, 1]], ["tile_x", "sp", [1, 1, 4, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[3.8659300000000004e-05], 0, 12.390298128128052, 1563871214.8534417], "v": 0.1} {"i": ["cuda -model=tx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 4, 4], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 4, 4, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1603405, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 1, 64, 1]], ["tile_y", "sp", [1, 1, 2, 2]], ["tile_x", "sp", [1, 1, 2, 2]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004848529], 0, 10.855116605758667, 1563873696.3061104], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [16, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [16, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 23510223, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 4, 2]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [1, 7, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00017527783194549583], 0, 2.204166889190674, 1563213049.8776293], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [16, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 16, 112, 112, "float32"], [16, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 3639370, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [4, 1, 4, 7]], ["tile_x", "sp", [2, 1, 56, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00014197828991596638], 0, 2.6736292839050293, 1563215568.6790938], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [32, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 112, 112, "float32"], [32, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9075125, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 8, 4]], ["tile_y", "sp", [28, 1, 1, 4]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [2, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00014621892543859648], 0, 7.110719442367554, 1563217612.1554525], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 112, 112, "float32"], [32, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1566208, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [14, 1, 1, 4]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.00013765299593165175], 0, 1.7799158096313477, 1563219842.1631362], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 56, 56], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 56, 56, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7179501, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 8, 8, 1]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00012920252442448062], 0, 2.753791570663452, 1563221945.6818686], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 56, 56, "float32"], [64, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 2881788, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [7, 1, 1, 8]], ["tile_x", "sp", [1, 2, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.000134341464549483], 0, 4.2877209186553955, 1563223804.7331185], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 20086176, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 32, 2, 1]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [16, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.000261794712345679], 0, 6.304214954376221, 1563225307.5526948], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 56, 56, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 596988, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [7, 1, 1, 4]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[7.142606261453879e-05], 0, 4.008775472640991, 1563226850.7057345], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 28, 28, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7381256, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 4, 8, 2]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00013018478359096312], 0, 6.441323518753052, 1563230309.5303924], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 28, 28, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1045800, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[7.951574353049908e-05], 0, 6.855978012084961, 1563230407.5483391], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6797064, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [7, 4, 1, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00023741288293650795], 0, 3.481106996536255, 1563231947.1887934], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 28, 28, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 170400, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [2, 1, 1, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[6.420577136842105e-05], 0, 4.719137668609619, 1563233447.8050773], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1499877, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 8, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00011502722228412257], 0, 3.17366099357605, 1563234338.6279411], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 14, 14, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 234639, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 2, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[5.109003548766158e-05], 0, 3.491395950317383, 1563235585.1519742], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2048997, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 8, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00021116841943419435], 0, 3.184243679046631, 1563236545.8893328], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 14, 14, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 12255, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[3.924678319327731e-05], 0, 4.53099250793457, 1563237927.7458644], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 7, 7, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 171021, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 4, 8, 2]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00014150062778730703], 0, 2.195864200592041, 1563239033.0780084], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 7, 7, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 16380, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[3.686833818628311e-05], 0, 8.514827013015747, 1563240033.4828553], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 149936, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 2, 4, 4]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002714924693572496], 0, 2.5949909687042236, 1563241534.4053085], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [8, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [8, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 15000798, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 4]], ["tile_y", "sp", [14, 1, 4, 2]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00015482867866108786], 0, 5.705176115036011, 1563243285.3283496], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [8, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 8, 112, 112, "float32"], [8, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1687640, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 1, 1, 1]], ["tile_y", "sp", [4, 1, 4, 7]], ["tile_x", "sp", [2, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[7.363857718337092e-05], 0, 6.139702796936035, 1563244917.5932703], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [16, 8, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 8, 112, 112, "float32"], [16, 8, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1553986, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 2, 2]], ["tile_y", "sp", [14, 2, 4, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [2, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[6.994848899266178e-05], 0, 5.869164228439331, 1563247272.2537227], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [16, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 16, 112, 112, "float32"], [16, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 978880, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 1, 1, 1]], ["tile_y", "sp", [14, 1, 1, 4]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[7.097581105990784e-05], 0, 7.02120566368103, 1563248498.5865142], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 56, 56], "float32"], ["TENSOR", [32, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 56, 56, "float32"], [32, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8368672, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 4, 2]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [2, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[4.545824338362602e-05], 0, 3.513225793838501, 1563249717.364269], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 56, 56], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 56, 56, "float32"], [32, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1921192, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [7, 1, 1, 8]], ["tile_x", "sp", [1, 2, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[6.85644796465762e-05], 0, 2.307628631591797, 1563251546.7273664], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 56, 56], "float32"], ["TENSOR", [32, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 56, 56, "float32"], [32, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4780323, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 4, 8]], ["tile_y", "sp", [14, 4, 1, 1]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[7.313684555266096e-05], 0, 2.7107584476470947, 1563253964.0875075], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 56, 56], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 56, 56, "float32"], [32, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 396592, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 1, 1]], ["tile_y", "sp", [7, 4, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[3.8380322149076236e-05], 0, 7.0753419399261475, 1563255567.5494761], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 28, 28], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 28, 28, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2075660, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [4, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[3.906622786281728e-05], 0, 5.226442098617554, 1563257735.7392588], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 28, 28, "float32"], [64, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 597660, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[4.32418401675485e-05], 0, 2.5058066844940186, 1563259256.8189473], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [64, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 28, 28, "float32"], [64, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5166885, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 8, 2]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[6.645289351716045e-05], 0, 8.765309572219849, 1563260661.173324], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 28, 28], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 28, 28, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 117852, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [1, 1, 7, 2]], ["tile_x", "sp", [1, 2, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[3.334162761830877e-05], 0, 3.8770089149475098, 1563262227.5402756], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 14, 14], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 14, 14, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1214064, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [4, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[2.9981879765836085e-05], 0, 5.832796335220337, 1563263674.5490437], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 14, 14, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 169568, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 2, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[2.5260917921878988e-05], 0, 2.6164073944091797, 1563265115.1844757], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 14, 14, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1367664, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[5.561408802698145e-05], 0, 3.6678686141967773, 1563266752.8962743], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 14, 14], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 14, 14, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 8890, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 4, 2, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[2.5040166886543536e-05], 0, 8.285832643508911, 1563268376.0363626], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 7, 7], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 7, 7, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 118007, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[4.540631451951322e-05], 0, 3.7682087421417236, 1563269378.0598779], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 7, 7, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4371, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[2.5135566276457372e-05], 0, 2.471785068511963, 1563270616.4319098], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 256, 7, 7], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 7, 7, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 130877, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[8.43837738867272e-05], 0, 6.163830280303955, 1563271697.5708985], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 7991553, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 8, 1]], ["tile_y", "sp", [28, 2, 1, 2]], ["tile_x", "sp", [4, 1, 28, 1]], ["tile_rc", "sp", [1, 3]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0007463850999999999], 0, 1.133371353149414, 1563299201.6934328], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 32, 112, 112, "float32"], [32, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 5985286, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 1, 2, 1]], ["tile_y", "sp", [7, 2, 4, 2]], ["tile_x", "sp", [8, 1, 7, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007699989], 0, 1.2738666534423828, 1563300323.0430667], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 11960046, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 8, 4, 2]], ["tile_y", "sp", [56, 2, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0007746248], 0, 1.1300718784332275, 1563302346.8218687], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 112, 112, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 2856672, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 1]], ["tile_y", "sp", [28, 1, 2, 1]], ["tile_x", "sp", [2, 2, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.000403757], 0, 0.9652185440063477, 1563303863.5485005], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 13201824, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 8, 1]], ["tile_y", "sp", [7, 2, 4, 1]], ["tile_x", "sp", [7, 2, 4, 1]], ["tile_rc", "sp", [8, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0012320707999999999], 0, 2.6889166831970215, 1563305439.9452949], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 4100520, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [4, 7, 2, 1]], ["tile_x", "sp", [2, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006930062], 0, 4.543447256088257, 1563307549.5120738], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 27120148, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 4, 16, 1]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [2, 2, 14, 1]], ["tile_rc", "sp", [16, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0013176191], 0, 1.15529203414917, 1563308468.9836693], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1043760, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 1, 1, 1]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00020384249999999999], 0, 1.0619704723358154, 1563309303.7050557], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8754337, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 4, 4]], ["tile_y", "sp", [1, 2, 14, 1]], ["tile_x", "sp", [14, 1, 2, 1]], ["tile_rc", "sp", [64, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.001088782], 0, 1.3260838985443115, 1563310476.2915967], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 880110, "t": "direct", "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 4, 7, 1]], ["tile_x", "sp", [2, 2, 7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003159079], 0, 1.136427879333496, 1563312339.3294115], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10131527, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 16, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [64, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0011632403], 0, 1.201418161392212, 1563314122.7430716], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 233145, "t": "direct", "c": null, "e": [["tile_f", "sp", [256, 1, 1, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003334068], 0, 4.2746782302856445, 1563315123.1666262], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1549481, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 4, 4, 1]], ["tile_y", "sp", [1, 14, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [256, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008359695], 0, 1.079960584640503, 1563315483.7015028], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 87350, "t": "direct", "c": null, "e": [["tile_f", "sp", [256, 1, 2, 1]], ["tile_y", "sp", [1, 2, 1, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.0002782611], 0, 1.3183612823486328, 1563316697.7188144], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 141269, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 4, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.001183149], 0, 1.1247057914733887, 1563317552.7005713], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 12771, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 2, 2, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002692127], 0, 1.5148050785064697, 1563318778.5047276], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [1024, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 149625, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 4, 32, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0009749307000000001], 0, 1.1249299049377441, 1563319620.504779], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 7195, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 32, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003422933], 0, 3.0528624057769775, 1563320553.2704954], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 263545, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 2, 4, 4]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [256, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0017210970999999998], 0, 1.3365283012390137, 1563321326.9945395], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [24, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [24, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 72880741, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 6, 4, 1]], ["tile_y", "sp", [14, 1, 8, 1]], ["tile_x", "sp", [7, 1, 8, 2]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008261574], 0, 3.494765281677246, 1563322633.3956487], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 24, 112, 112], "float32"], ["TENSOR", [24, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 24, 112, 112, "float32"], [24, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 8296640, "t": "direct", "c": null, "e": [["tile_f", "sp", [24, 1, 1, 1]], ["tile_y", "sp", [14, 1, 1, 8]], ["tile_x", "sp", [2, 2, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00023585889999999999], 0, 1.3065319061279297, 1563325401.129285], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 112, 112], "float32"], ["TENSOR", [48, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 112, 112, "float32"], [48, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 94707335, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 3, 16]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [2, 1, 28, 2]], ["tile_rc", "sp", [8, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005658391], 0, 1.0900371074676514, 1563327476.6003084], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 48, 112, 112], "float32"], ["TENSOR", [48, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 48, 112, 112, "float32"], [48, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 4796725, "t": "direct", "c": null, "e": [["tile_f", "sp", [24, 1, 1, 2]], ["tile_y", "sp", [7, 1, 8, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007296344], 0, 2.885833501815796, 1563327994.332088], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 48, 56, 56], "float32"], ["TENSOR", [96, 48, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 56, 56, "float32"], [96, 48, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1953692, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 4, 8]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [24, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004161021], 0, 1.3280529975891113, 1563329754.5648842], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 56, 56, "float32"], [96, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 6267969, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 2, 1, 1]], ["tile_y", "sp", [2, 2, 2, 7]], ["tile_x", "sp", [1, 1, 56, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005678447], 0, 1.660996675491333, 1563331322.8285992], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [96, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 56, 56, "float32"], [96, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 54872063, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 16, 3]], ["tile_y", "sp", [7, 8, 1, 1]], ["tile_x", "sp", [7, 1, 8, 1]], ["tile_rc", "sp", [32, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008999448], 0, 1.1297895908355713, 1563333163.5811095], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 56, 56, "float32"], [96, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1950816, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 1, 1]], ["tile_y", "sp", [1, 1, 7, 4]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004517697], 0, 4.714244365692139, 1563335585.493781], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 28, 28], "float32"], ["TENSOR", [192, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 28, 28, "float32"], [192, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 27082671, "t": "direct", "c": null, "e": [["tile_f", "sp", [6, 2, 16, 1]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [32, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005378599], 0, 1.1491811275482178, 1563336455.0089135], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [192, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 192, 28, 28, "float32"], [192, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 236628, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 1, 2]], ["tile_y", "sp", [2, 1, 7, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003567415], 0, 4.027453184127808, 1563338936.539752], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [192, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 28, 28, "float32"], [192, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 15688312, "t": "direct", "c": null, "e": [["tile_f", "sp", [3, 2, 16, 2]], ["tile_y", "sp", [1, 7, 4, 1]], ["tile_x", "sp", [7, 2, 2, 1]], ["tile_rc", "sp", [96, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0008143236], 0, 1.063072919845581, 1563339271.6948323], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [192, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 192, 28, 28, "float32"], [192, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 474586, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 2, 3]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003871843], 0, 2.9838969707489014, 1563340406.8597207], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 14, 14], "float32"], ["TENSOR", [384, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 14, 14, "float32"], [384, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3629926, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 2, 12, 2]], ["tile_y", "sp", [1, 7, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [96, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0006809956], 0, 1.8068442344665527, 1563341402.0820334], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [384, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 384, 14, 14, "float32"], [384, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 309600, "t": "direct", "c": null, "e": [["tile_f", "sp", [384, 1, 1, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00024057750000000002], 0, 1.321399450302124, 1563342711.801517], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [384, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [384, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4119089, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 4, 4, 3]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [192, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0011110484], 0, 4.10141396522522, 1563344612.7624166], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [384, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 384, 14, 14, "float32"], [384, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 3926, "t": "direct", "c": null, "e": [["tile_f", "sp", [12, 1, 32, 1]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0005201727], 0, 0.996361494064331, 1563345182.6044848], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 384, 7, 7], "float32"], ["TENSOR", [768, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 7, 7, "float32"], [768, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 905070, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 3, 16, 2]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007131347], 0, 2.2746970653533936, 1563345738.851489], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 768, 7, 7], "float32"], ["TENSOR", [768, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 768, 7, 7, "float32"], [768, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 49311, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 1, 12]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003563196], 0, 1.5384790897369385, 1563347051.2535226], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 768, 7, 7], "float32"], ["TENSOR", [768, 768, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 768, 7, 7, "float32"], [768, 768, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 998926, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 2, 12, 4]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [128, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006935974], 0, 1.6845948696136475, 1563348340.7838874], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [8, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [8, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 11096763, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 8, 1, 1]], ["tile_y", "sp", [28, 1, 4, 1]], ["tile_x", "sp", [1, 2, 56, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [3, 1]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004199611], 0, 1.3166685104370117, 1563472213.2593744], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [8, 8, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 8, 112, 112, "float32"], [8, 8, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 907223, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 8, 1, 1]], ["tile_y", "sp", [56, 2, 1, 1]], ["tile_x", "sp", [1, 1, 112, 1]], ["tile_rc", "sp", [2, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003327833], 0, 5.912122964859009, 1563474090.380373], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [8, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 8, 112, 112, "float32"], [8, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 2169540, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 1, 1, 1]], ["tile_y", "sp", [8, 1, 1, 14]], ["tile_x", "sp", [1, 1, 56, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00041971179999999997], 0, 6.047390460968018, 1563475336.0940797], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [4, 8, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 8, 112, 112, "float32"], [4, 8, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3981628, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 2]], ["tile_y", "sp", [28, 4, 1, 1]], ["tile_x", "sp", [1, 1, 112, 1]], ["tile_rc", "sp", [8, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003116523], 0, 3.4420485496520996, 1563476529.1148903], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 4, 112, 112], "float32"], ["TENSOR", [24, 4, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 4, 112, 112, "float32"], [24, 4, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 22448477, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 1, 12]], ["tile_y", "sp", [56, 1, 1, 2]], ["tile_x", "sp", [1, 1, 112, 1]], ["tile_rc", "sp", [1, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003017821], 0, 1.0537893772125244, 1563478419.5314467], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 24, 112, 112], "float32"], ["TENSOR", [24, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 24, 112, 112, "float32"], [24, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 2849920, "t": "direct", "c": null, "e": [["tile_f", "sp", [24, 1, 1, 1]], ["tile_y", "sp", [4, 1, 14, 1]], ["tile_x", "sp", [2, 1, 14, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003851019], 0, 5.521921396255493, 1563480097.1566155], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 56, 56], "float32"], ["TENSOR", [6, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 56, 56, "float32"], [6, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1777962, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 1, 2]], ["tile_y", "sp", [14, 4, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [12, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00022916619999999998], 0, 5.636163711547852, 1563480747.843647], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 36, 56, 56], "float32"], ["TENSOR", [36, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 36, 56, 56, "float32"], [36, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 2784409, "t": "direct", "c": null, "e": [["tile_f", "sp", [18, 1, 2, 1]], ["tile_y", "sp", [7, 8, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005557666000000001], 0, 5.998594284057617, 1563481734.4010372], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 36, 56, 56], "float32"], ["TENSOR", [6, 36, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 36, 56, 56, "float32"], [6, 36, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4645149, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 1, 3]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [36, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003213082], 0, 5.385282278060913, 1563483660.12582], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 6, 56, 56], "float32"], ["TENSOR", [36, 6, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 6, 56, 56, "float32"], [36, 6, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6632941, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 18, 1, 2]], ["tile_y", "sp", [14, 2, 2, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [2, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.000279365], 0, 1.3627278804779053, 1563484166.401688], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 36, 56, 56], "float32"], ["TENSOR", [36, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 36, 56, 56, "float32"], [36, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 708200, "t": "direct", "c": null, "e": [["tile_f", "sp", [36, 1, 1, 1]], ["tile_y", "sp", [7, 4, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.00047366559999999995], 0, 5.306597471237183, 1563485127.6013975], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 36, 28, 28], "float32"], ["TENSOR", [8, 36, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 36, 28, 28, "float32"], [8, 36, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1581812, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 1, 2]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [6, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001309714], 0, 5.595698356628418, 1563486342.2145631], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 48, 28, 28], "float32"], ["TENSOR", [48, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 48, 28, 28, "float32"], [48, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1220100, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003184562], 0, 2.855104923248291, 1563487223.9120524], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 48, 28, 28], "float32"], ["TENSOR", [8, 48, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 28, 28, "float32"], [8, 48, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1421817, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 1, 4]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [8, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001621127], 0, 3.984964609146118, 1563488039.9753559], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 8, 28, 28], "float32"], ["TENSOR", [48, 8, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 8, 28, 28, "float32"], [48, 8, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3232299, "t": "direct", "c": null, "e": [["tile_f", "sp", [3, 2, 1, 8]], ["tile_y", "sp", [7, 2, 2, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [2, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001521698], 0, 1.4207863807678223, 1563490152.4042702], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 48, 28, 28], "float32"], ["TENSOR", [48, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 48, 28, 28, "float32"], [48, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 196420, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 1, 1]], ["tile_y", "sp", [1, 1, 7, 2]], ["tile_x", "sp", [1, 2, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002929955], 0, 4.772768974304199, 1563490594.3126194], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 48, 14, 14], "float32"], ["TENSOR", [16, 48, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 14, 14, "float32"], [16, 48, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 139206, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 4, 1]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [6, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00010335100000000001], 0, 4.8592848777771, 1563492032.0478868], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 14, 14], "float32"], ["TENSOR", [16, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 14, 14, "float32"], [16, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 382529, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 16, 1]], ["tile_y", "sp", [7, 2, 1, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["tile_rc", "sp", [8, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[9.28978e-05], 0, 0.9375708103179932, 1563492425.808311], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 14, 14], "float32"], ["TENSOR", [96, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 14, 14, "float32"], [96, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1291498, "t": "direct", "c": null, "e": [["tile_f", "sp", [6, 2, 2, 4]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [4, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001487999], 0, 4.89414119720459, 1563493531.6823072], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 14, 14], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 14, 14, "float32"], [96, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 318540, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 2, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00021999950000000003], 0, 1.775022029876709, 1563494472.8527527], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 14, 14], "float32"], ["TENSOR", [24, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 14, 14, "float32"], [24, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 133621, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 6, 4, 1]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [8, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0001512323], 0, 2.8360612392425537, 1563495120.2439334], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 14, 14], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 14, 14, "float32"], [144, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 404965, "t": "direct", "c": null, "e": [["tile_f", "sp", [72, 1, 2, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002845267], 0, 2.099392890930176, 1563496060.4253402], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 14, 14], "float32"], ["TENSOR", [24, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 14, 14, "float32"], [24, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 748306, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 6, 4]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 9]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00028049009999999997], 0, 6.290288686752319, 1563497068.1796682], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 14, 14], "float32"], ["TENSOR", [144, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 14, 14, "float32"], [144, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1839881, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 1, 9]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [4, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0001763473], 0, 2.455428123474121, 1563498131.4769075], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 14, 14], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 14, 14, "float32"], [144, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 28868, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 1, 3, 3]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [7, 1, 1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.000200004], 0, 4.584266662597656, 1563499800.1272783], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 7, 7], "float32"], ["TENSOR", [40, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 7, 7, "float32"], [40, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 82150, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 1, 1, 10]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [7, 1, 1, 1]], ["tile_rc", "sp", [24, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001551958], 0, 7.283468723297119, 1563500373.413413], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 240, 7, 7], "float32"], ["TENSOR", [40, 240, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 240, 7, 7, "float32"], [40, 240, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 134146, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 8, 5]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [48, 5]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002378851], 0, 7.720376491546631, 1563501392.691263], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 40, 7, 7], "float32"], ["TENSOR", [240, 40, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 40, 7, 7, "float32"], [240, 40, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 95344, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 15, 1, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [10, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00023081159999999997], 0, 5.112323522567749, 1563502613.459936], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 240, 7, 7], "float32"], ["TENSOR", [240, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 240, 7, 7, "float32"], [240, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 14695, "t": "direct", "c": null, "e": [["tile_f", "sp", [120, 1, 1, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.00015427920000000002], 0, 2.699763059616089, 1563503700.2403505], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 240, 7, 7], "float32"], ["TENSOR", [80, 240, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 240, 7, 7, "float32"], [80, 240, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 167179, "t": "direct", "c": null, "e": [["tile_f", "sp", [10, 2, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [6, 40]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001935716], 0, 8.121056318283081, 1563504467.4629724], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 80, 7, 7], "float32"], ["TENSOR", [1280, 80, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 80, 7, 7, "float32"], [1280, 80, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 144021, "t": "direct", "c": null, "e": [["tile_f", "sp", [40, 16, 1, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 5]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00039073739999999997], 0, 1.4516119956970215, 1563505310.082], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 1280, 1, 1], "float32"], ["TENSOR", [1000, 1280, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1280, 1, 1, "float32"], [1000, 1280, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2063, "t": "direct", "c": null, "e": [["tile_f", "sp", [10, 5, 20, 1]], ["tile_y", "sp", [1, 1, 1, 1]], ["tile_x", "sp", [1, 1, 1, 1]], ["tile_rc", "sp", [128, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0017111868999999998], 0, 2.921250581741333, 1563506709.5695226], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [16, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [16, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 28995398, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 8]], ["tile_y", "sp", [14, 1, 4, 2]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [3, 1]], ["tile_ry", "sp", [1, 3]], ["tile_rx", "sp", [3, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006420646], 0, 5.660494089126587, 1563571858.1381931], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [16, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 112, 112, "float32"], [16, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 18041804, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 16, 1, 1]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [2, 1, 56, 1]], ["tile_rc", "sp", [8, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002752031], 0, 1.8210670948028564, 1563574714.6377528], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [16, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 16, 112, 112, "float32"], [16, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 3623761, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 2, 1, 1]], ["tile_y", "sp", [14, 2, 1, 4]], ["tile_x", "sp", [4, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003648716], 0, 1.3725965023040771, 1563576563.865319], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [8, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 112, 112, "float32"], [8, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5932958, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 4]], ["tile_y", "sp", [7, 1, 1, 16]], ["tile_x", "sp", [28, 1, 4, 1]], ["tile_rc", "sp", [16, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.000512458], 0, 3.501155138015747, 1563577402.841827], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 8, 112, 112], "float32"], ["TENSOR", [48, 8, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 8, 112, 112, "float32"], [48, 8, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 58398907, "t": "direct", "c": null, "e": [["tile_f", "sp", [3, 16, 1, 1]], ["tile_y", "sp", [28, 1, 1, 4]], ["tile_x", "sp", [4, 1, 28, 1]], ["tile_rc", "sp", [4, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005922150999999999], 0, 7.805870056152344, 1563580476.580741], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 48, 112, 112], "float32"], ["TENSOR", [48, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 48, 112, 112, "float32"], [48, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 3902640, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 1, 1]], ["tile_y", "sp", [14, 1, 2, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.00047931149999999996], 0, 5.9140424728393555, 1563582987.4396615], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 48, 56, 56], "float32"], ["TENSOR", [12, 48, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 56, 56, "float32"], [12, 48, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8284874, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 1, 4]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [16, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001336433], 0, 1.495875597000122, 1563583746.1203547], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 72, 56, 56], "float32"], ["TENSOR", [72, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 72, 56, 56, "float32"], [72, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 3004800, "t": "direct", "c": null, "e": [["tile_f", "sp", [72, 1, 1, 1]], ["tile_y", "sp", [2, 1, 4, 7]], ["tile_x", "sp", [2, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00044896199999999996], 0, 1.7853186130523682, 1563587728.3463523], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 72, 56, 56], "float32"], ["TENSOR", [12, 72, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 72, 56, 56, "float32"], [12, 72, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 12894261, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 6, 1, 2]], ["tile_y", "sp", [14, 1, 2, 2]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [24, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004206178], 0, 5.912015199661255, 1563588945.6376362], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 12, 56, 56], "float32"], ["TENSOR", [72, 12, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 12, 56, 56, "float32"], [72, 12, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10883089, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 9, 2]], ["tile_y", "sp", [7, 2, 4, 1]], ["tile_x", "sp", [7, 1, 4, 2]], ["tile_rc", "sp", [4, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004735093], 0, 7.310347557067871, 1563590553.7575202], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 72, 56, 56], "float32"], ["TENSOR", [72, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 72, 56, 56, "float32"], [72, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1730000, "t": "direct", "c": null, "e": [["tile_f", "sp", [72, 1, 1, 1]], ["tile_y", "sp", [7, 1, 4, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002993967], 0, 5.477226495742798, 1563592394.0816438], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 72, 28, 28], "float32"], ["TENSOR", [16, 72, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 72, 28, 28, "float32"], [16, 72, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 920463, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 8]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [12, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[7.04545e-05], 0, 2.233208179473877, 1563594480.1246781], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 28, 28], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 28, 28, "float32"], [96, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1952160, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 1, 1]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005853557], 0, 2.4691832065582275, 1563595862.2960944], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 28, 28], "float32"], ["TENSOR", [16, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 28, 28, "float32"], [16, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 975864, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 2, 4]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [12, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0002746461], 0, 4.10405158996582, 1563597328.355284], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 28, 28], "float32"], ["TENSOR", [96, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 28, 28, "float32"], [96, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10188897, "t": "direct", "c": null, "e": [["tile_f", "sp", [3, 8, 4, 1]], ["tile_y", "sp", [14, 1, 2, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [2, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003809035], 0, 1.194957971572876, 1563598596.8397884], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 28, 28], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 28, 28, "float32"], [96, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 317184, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005252032], 0, 1.7279746532440186, 1563599843.5571277], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 14, 14], "float32"], ["TENSOR", [32, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 14, 14, "float32"], [32, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 608640, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 4, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 2, 7, 1]], ["tile_rc", "sp", [8, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002504947], 0, 2.5403547286987305, 1563602073.484461], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 14, 14], "float32"], ["TENSOR", [32, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 14, 14, "float32"], [32, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 93705, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 8, 1]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.00016133169999999999], 0, 0.9194610118865967, 1563603409.6528583], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 14, 14], "float32"], ["TENSOR", [192, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 14, 14, "float32"], [192, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2367355, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 1, 8, 6]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [4, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001905092], 0, 1.9229981899261475, 1563604676.4655375], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 192, 14, 14], "float32"], ["TENSOR", [192, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 192, 14, 14, "float32"], [192, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 391790, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 2, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003669448], 0, 1.0925202369689941, 1563606583.4241602], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 14, 14], "float32"], ["TENSOR", [48, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 14, 14, "float32"], [48, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 65837, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 1, 12, 1]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 2, 1, 7]], ["tile_rc", "sp", [96, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003705284], 0, 7.918815851211548, 1563608540.3333068], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [288, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 288, 14, 14, "float32"], [288, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 646818, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 2, 1]], ["tile_y", "sp", [1, 14, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.00041494089999999996], 0, 5.462493419647217, 1563868864.0429678], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [48, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 14, 14, "float32"], [48, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 840727, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 6, 4, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 2, 7, 1]], ["tile_rc", "sp", [36, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004624472], 0, 2.9920666217803955, 1563865413.6820064], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 48, 14, 14], "float32"], ["TENSOR", [288, 48, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 48, 14, 14, "float32"], [288, 48, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5953802, "t": "direct", "c": null, "e": [["tile_f", "sp", [12, 2, 1, 12]], ["tile_y", "sp", [1, 2, 7, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [24, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001645762], 0, 1.7249259948730469, 1563867240.1376846], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [288, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 288, 14, 14, "float32"], [288, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 49930, "t": "direct", "c": null, "e": [["tile_f", "sp", [18, 1, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003643564], 0, 4.674288511276245, 1563615060.225669], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 288, 7, 7], "float32"], ["TENSOR", [80, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 7, 7, "float32"], [80, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 55739, "t": "direct", "c": null, "e": [["tile_f", "sp", [10, 2, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [32, 9]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00025511990000000003], 0, 6.906548261642456, 1563615953.9113], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 480, 7, 7], "float32"], ["TENSOR", [80, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 7, 7, "float32"], [80, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 234405, "t": "direct", "c": null, "e": [["tile_f", "sp", [40, 1, 1, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [40, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006317161], 0, 5.954474687576294, 1563617032.2924523], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 80, 7, 7], "float32"], ["TENSOR", [480, 80, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 80, 7, 7, "float32"], [480, 80, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 638947, "t": "direct", "c": null, "e": [["tile_f", "sp", [10, 6, 8, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [10, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00013598200000000001], 0, 1.7992403507232666, 1563618918.372097], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 480, 7, 7], "float32"], ["TENSOR", [480, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 480, 7, 7, "float32"], [480, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 23522, "t": "direct", "c": null, "e": [["tile_f", "sp", [40, 2, 3, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.0002233901], 0, 1.3864057064056396, 1563620451.0676944], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 480, 7, 7], "float32"], ["TENSOR", [160, 480, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 480, 7, 7, "float32"], [160, 480, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 113167, "t": "direct", "c": null, "e": [["tile_f", "sp", [10, 1, 16, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [48, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004995155], 0, 1.1077353954315186, 1563621051.6641092], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 160, 7, 7], "float32"], ["TENSOR", [1280, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 7, 7, "float32"], [1280, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 532050, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 8, 20, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 7, 1, 1]], ["tile_rc", "sp", [40, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004284881], 0, 4.113452672958374, 1563623391.2203417], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 112, 112], "float32"], ["TENSOR", [24, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 112, 112, "float32"], [24, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 45819257, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 8, 3]], ["tile_y", "sp", [112, 1, 1, 1]], ["tile_x", "sp", [2, 7, 8, 1]], ["tile_rc", "sp", [3, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005663862000000001], 0, 5.783212900161743, 1563835775.2112288], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 112, 112], "float32"], ["TENSOR", [12, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 112, 112, "float32"], [12, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 15133038, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 6]], ["tile_y", "sp", [56, 1, 1, 2]], ["tile_x", "sp", [2, 1, 56, 1]], ["tile_rc", "sp", [6, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004327836], 0, 6.417611122131348, 1563837665.7587535], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 12, 112, 112], "float32"], ["TENSOR", [72, 12, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 12, 112, 112, "float32"], [72, 12, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 114716584, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 1, 3, 12]], ["tile_y", "sp", [28, 4, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [1, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007520981], 0, 8.184881448745728, 1563839493.7001426], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 72, 112, 112], "float32"], ["TENSOR", [72, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 72, 112, 112, "float32"], [72, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 5492400, "t": "direct", "c": null, "e": [["tile_f", "sp", [72, 1, 1, 1]], ["tile_y", "sp", [7, 1, 8, 1]], ["tile_x", "sp", [1, 7, 8, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008188178000000001], 0, 5.28718376159668, 1563842729.4613657], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 72, 56, 56], "float32"], ["TENSOR", [18, 72, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 72, 56, 56, "float32"], [18, 72, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1112702, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 3, 2]], ["tile_y", "sp", [8, 1, 1, 7]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [12, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003159428], 0, 1.8616046905517578, 1563844490.649786], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 108, 56, 56], "float32"], ["TENSOR", [108, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 108, 56, 56, "float32"], [108, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 6661800, "t": "direct", "c": null, "e": [["tile_f", "sp", [108, 1, 1, 1]], ["tile_y", "sp", [1, 1, 56, 1]], ["tile_x", "sp", [2, 7, 4, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0007611874999999999], 0, 6.743911504745483, 1563846955.2478983], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 108, 56, 56], "float32"], ["TENSOR", [18, 108, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 108, 56, 56, "float32"], [18, 108, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4188894, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 6, 1]], ["tile_y", "sp", [14, 4, 1, 1]], ["tile_x", "sp", [1, 1, 56, 1]], ["tile_rc", "sp", [18, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.00044569630000000003], 0, 1.033301591873169, 1563848279.097361], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 18, 56, 56], "float32"], ["TENSOR", [108, 18, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 18, 56, 56, "float32"], [108, 18, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 6848165, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 3, 9]], ["tile_y", "sp", [56, 1, 1, 1]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [1, 18]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004656236], 0, 1.6690847873687744, 1563850998.7228894], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 108, 56, 56], "float32"], ["TENSOR", [108, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 108, 56, 56, "float32"], [108, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1685200, "t": "direct", "c": null, "e": [["tile_f", "sp", [108, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 2]], ["tile_x", "sp", [7, 1, 4, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0003303451], 0, 2.061370372772217, 1563853791.4469223], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 108, 28, 28], "float32"], ["TENSOR", [24, 108, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 108, 28, 28, "float32"], [24, 108, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8118632, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 4, 6]], ["tile_y", "sp", [7, 4, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [27, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002859697], 0, 3.3151755332946777, 1563855437.4711468], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 28, 28], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 28, 28, "float32"], [144, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 2977100, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 1, 1]], ["tile_y", "sp", [1, 1, 14, 2]], ["tile_x", "sp", [4, 1, 7, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004019193], 0, 2.3931376934051514, 1563857451.5451305], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 28, 28], "float32"], ["TENSOR", [24, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 28, 28, "float32"], [24, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8119912, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 4, 6]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [36, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005485007], 0, 1.3341212272644043, 1563860410.277848], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 28, 28], "float32"], ["TENSOR", [144, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 28, 28, "float32"], [144, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 23176560, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 24, 6]], ["tile_y", "sp", [14, 1, 1, 2]], ["tile_x", "sp", [2, 1, 14, 1]], ["tile_rc", "sp", [12, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00047266060000000006], 0, 1.5779361724853516, 1563861390.1890488], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 28, 28], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 28, 28, "float32"], [144, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 136850, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 1, 1]], ["tile_y", "sp", [1, 2, 7, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.0001910091], 0, 7.449353218078613, 1563862795.8002825], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 14, 14], "float32"], ["TENSOR", [48, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 14, 14, "float32"], [48, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2383711, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 8, 2]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 9]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.00026007819999999997], 0, 2.290708541870117, 1563863760.223789], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 288, 14, 14], "float32"], ["TENSOR", [72, 288, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 288, 14, 14, "float32"], [72, 288, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4021689, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 9, 2]], ["tile_y", "sp", [2, 1, 1, 7]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 9]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001625815], 0, 1.689070224761963, 1563870680.1814914], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 432, 14, 14], "float32"], ["TENSOR", [432, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 432, 14, 14, "float32"], [432, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1029036, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 3, 1]], ["tile_y", "sp", [1, 1, 2, 7]], ["tile_x", "sp", [1, 1, 7, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005124203], 0, 4.450181722640991, 1563871505.3293133], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 432, 14, 14], "float32"], ["TENSOR", [72, 432, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 432, 14, 14, "float32"], [72, 432, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3302850, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 3, 12, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [72, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004148991], 0, 3.813995599746704, 1563873430.5281537], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 72, 14, 14], "float32"], ["TENSOR", [432, 72, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 72, 14, 14, "float32"], [432, 72, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4900191, "t": "direct", "c": null, "e": [["tile_f", "sp", [27, 2, 4, 2]], ["tile_y", "sp", [1, 1, 14, 1]], ["tile_x", "sp", [1, 7, 2, 1]], ["tile_rc", "sp", [18, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00043509789999999994], 0, 1.0989494323730469, 1563875996.5277498], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 432, 14, 14], "float32"], ["TENSOR", [432, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 432, 14, 14, "float32"], [432, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 23883, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 9, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [7, 1, 1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00025497409999999997], 0, 1.5585696697235107, 1563876746.8277965], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 432, 7, 7], "float32"], ["TENSOR", [120, 432, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 432, 7, 7, "float32"], [120, 432, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 141830, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 5, 12, 1]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [36, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0002868705], 0, 8.323650360107422, 1563878011.5012248], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 720, 7, 7], "float32"], ["TENSOR", [120, 720, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 720, 7, 7, "float32"], [120, 720, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 32723, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 30, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 7, 1, 1]], ["tile_rc", "sp", [90, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004971718], 0, 1.8913400173187256, 1563878892.4705105], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 120, 7, 7], "float32"], ["TENSOR", [720, 120, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 120, 7, 7, "float32"], [720, 120, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 799821, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 3, 30, 2]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [30, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0001851704], 0, 1.1566121578216553, 1563880638.2408812], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 720, 7, 7], "float32"], ["TENSOR", [720, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 720, 7, 7, "float32"], [720, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 104227, "t": "direct", "c": null, "e": [["tile_f", "sp", [45, 1, 4, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.00022123350000000002], 0, 1.1726312637329102, 1563881834.9466856], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 720, 7, 7], "float32"], ["TENSOR", [240, 720, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 720, 7, 7, "float32"], [240, 720, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 606799, "t": "direct", "c": null, "e": [["tile_f", "sp", [3, 1, 20, 4]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [80, 9]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0005188389], 0, 5.968128442764282, 1563883477.5756733], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 240, 7, 7], "float32"], ["TENSOR", [1280, 240, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 240, 7, 7, "float32"], [1280, 240, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 239932, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 2, 8, 5]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [80, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0007921566], 0, 1.8928780555725098, 1563884247.2643268], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [32, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [32, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 9024444, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 4, 4]], ["tile_y", "sp", [56, 1, 2, 1]], ["tile_x", "sp", [2, 7, 8, 1]], ["tile_rc", "sp", [8, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0008152662999999999], 0, 3.3597683906555176, 1563905762.1451051], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [16, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [16, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 10472347, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 1, 8]], ["tile_y", "sp", [14, 1, 8, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [4, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0005172028000000001], 0, 1.812384843826294, 1563906836.9931555], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 16, 112, 112], "float32"], ["TENSOR", [96, 16, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 16, 112, 112, "float32"], [96, 16, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 62626271, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 16, 3]], ["tile_y", "sp", [56, 2, 1, 1]], ["tile_x", "sp", [7, 1, 16, 1]], ["tile_rc", "sp", [1, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0009247605000000001], 0, 1.6357882022857666, 1563908952.7851167], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 96, 112, 112], "float32"], ["TENSOR", [96, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 96, 112, 112, "float32"], [96, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 8010464, "t": "direct", "c": null, "e": [["tile_f", "sp", [96, 1, 1, 1]], ["tile_y", "sp", [28, 2, 1, 1]], ["tile_x", "sp", [1, 1, 28, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008477808999999999], 0, 2.1602909564971924, 1563910528.925485], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 56, 56], "float32"], ["TENSOR", [24, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 56, 56, "float32"], [24, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1205665, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 3, 4]], ["tile_y", "sp", [28, 1, 1, 2]], ["tile_x", "sp", [1, 2, 28, 1]], ["tile_rc", "sp", [32, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004214724], 0, 1.6269094944000244, 1563910863.0835428], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 56, 56], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 56, 56, "float32"], [144, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 7345453, "t": "direct", "c": null, "e": [["tile_f", "sp", [36, 4, 1, 1]], ["tile_y", "sp", [2, 1, 28, 1]], ["tile_x", "sp", [7, 1, 8, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0008883621], 0, 1.1234588623046875, 1563912061.6719525], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 56, 56], "float32"], ["TENSOR", [24, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 56, 56, "float32"], [24, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 25775306, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 3, 8, 1]], ["tile_y", "sp", [14, 2, 1, 2]], ["tile_x", "sp", [2, 1, 28, 1]], ["tile_rc", "sp", [18, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006234606], 0, 0.9741504192352295, 1563913508.4095228], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 24, 56, 56], "float32"], ["TENSOR", [144, 24, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 24, 56, 56, "float32"], [144, 24, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 5175499, "t": "direct", "c": null, "e": [["tile_f", "sp", [6, 4, 6, 1]], ["tile_y", "sp", [7, 1, 1, 8]], ["tile_x", "sp", [4, 1, 14, 1]], ["tile_rc", "sp", [8, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0005672926], 0, 0.9680719375610352, 1563915365.8073242], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 144, 56, 56], "float32"], ["TENSOR", [144, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 144, 56, 56, "float32"], [144, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 3166100, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 1, 1]], ["tile_y", "sp", [14, 1, 2, 1]], ["tile_x", "sp", [1, 1, 14, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006848055], 0, 4.307114839553833, 1563916349.0590005], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 144, 28, 28], "float32"], ["TENSOR", [32, 144, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 144, 28, 28, "float32"], [32, 144, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 4251011, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 4, 8]], ["tile_y", "sp", [4, 1, 1, 7]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [48, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001875872], 0, 1.1498961448669434, 1563917676.8238914], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 28, 28], "float32"], ["TENSOR", [32, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 28, 28, "float32"], [32, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3799486, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 1, 2, 16]], ["tile_y", "sp", [7, 2, 2, 1]], ["tile_x", "sp", [1, 2, 14, 1]], ["tile_rc", "sp", [192, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.00040310709999999997], 0, 1.8321261405944824, 1563919318.0209608], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 32, 28, 28], "float32"], ["TENSOR", [192, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 28, 28, "float32"], [192, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8830118, "t": "direct", "c": null, "e": [["tile_f", "sp", [6, 8, 4, 1]], ["tile_y", "sp", [28, 1, 1, 1]], ["tile_x", "sp", [1, 1, 28, 1]], ["tile_rc", "sp", [2, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00042031589999999997], 0, 3.5569820404052734, 1563921013.8294067], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 192, 14, 14], "float32"], ["TENSOR", [64, 192, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 14, 14, "float32"], [64, 192, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1623740, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 4, 8, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [24, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001669982], 0, 1.3072373867034912, 1563922527.3568666], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [64, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [64, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1838028, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 4, 16, 1]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 2, 7, 1]], ["tile_rc", "sp", [48, 8]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004128991], 0, 1.5514931678771973, 1563924611.140329], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 64, 14, 14], "float32"], ["TENSOR", [384, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 14, 14, "float32"], [384, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 179060, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 6, 2, 1]], ["tile_y", "sp", [1, 7, 2, 1]], ["tile_x", "sp", [1, 2, 7, 1]], ["tile_rc", "sp", [32, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003517311], 0, 3.1334125995635986, 1563925865.2593381], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 384, 14, 14], "float32"], ["TENSOR", [96, 384, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 14, 14, "float32"], [96, 384, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 2209859, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 6, 8, 2]], ["tile_y", "sp", [7, 1, 1, 2]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [32, 12]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0005185574], 0, 3.6280014514923096, 1563926861.285745], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [576, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 576, 14, 14, "float32"], [576, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1226453, "t": "direct", "c": null, "e": [["tile_f", "sp", [144, 1, 4, 1]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 1, 7, 2]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00041980409999999995], 0, 3.8443856239318848, 1563927968.3188756], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [96, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 14, 14, "float32"], [96, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1338207, "t": "direct", "c": null, "e": [["tile_f", "sp", [6, 4, 4, 1]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 7, 2, 1]], ["tile_rc", "sp", [192, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003578874], 0, 1.2072250843048096, 1563929824.2421997], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 96, 14, 14], "float32"], ["TENSOR", [576, 96, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 96, 14, 14, "float32"], [576, 96, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8710019, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 9, 4, 1]], ["tile_y", "sp", [7, 2, 1, 1]], ["tile_x", "sp", [1, 1, 14, 1]], ["tile_rc", "sp", [16, 6]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0006625660000000001], 0, 4.051753044128418, 1563931051.1020265], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 576, 14, 14], "float32"], ["TENSOR", [576, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 576, 14, 14, "float32"], [576, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 42250, "t": "direct", "c": null, "e": [["tile_f", "sp", [24, 1, 12, 2]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [7, 1, 1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.00040676339999999996], 0, 5.312792539596558, 1563932106.775524], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 576, 7, 7], "float32"], ["TENSOR", [160, 576, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 576, 7, 7, "float32"], [160, 576, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 89203, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 1, 20, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [144, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.0003696533], 0, 6.698707342147827, 1563932840.814277], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [160, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 7, 7, "float32"], [160, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 409945, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 5, 4, 1]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 7, 1, 1]], ["tile_rc", "sp", [320, 3]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 1]]}], "r": [[0.0004774366], 0, 1.0330867767333984, 1563933976.373431], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 160, 7, 7], "float32"], ["TENSOR", [960, 160, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 160, 7, 7, "float32"], [960, 160, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 123026, "t": "direct", "c": null, "e": [["tile_f", "sp", [12, 1, 16, 5]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [16, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.0004579103], 0, 2.914686679840088, 1563935190.8175657], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [960, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 960, 7, 7, "float32"], [960, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 100039, "t": "direct", "c": null, "e": [["tile_f", "sp", [48, 1, 5, 4]], ["tile_y", "sp", [1, 1, 7, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001617532], 0, 1.1516392230987549, 1563936283.3845649], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 960, 7, 7], "float32"], ["TENSOR", [320, 960, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 960, 7, 7, "float32"], [320, 960, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 342516, "t": "direct", "c": null, "e": [["tile_f", "sp", [5, 2, 16, 2]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [96, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.0007919168000000001], 0, 3.107330083847046, 1563937889.7687075], "v": 0.1} {"i": ["cuda -model=jetson-nano", "topi_nn_conv2d", [["TENSOR", [1, 320, 7, 7], "float32"], ["TENSOR", [1280, 320, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 320, 7, 7, "float32"], [1280, 320, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 502476, "t": "direct", "c": null, "e": [["tile_f", "sp", [20, 1, 32, 2]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [32, 10]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0005872568], 0, 1.0580263137817383, 1563939211.8394237], "v": 0.1}